init
This commit is contained in:
44
.gitignore
vendored
Normal file
44
.gitignore
vendored
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
HELP.md
|
||||||
|
.gradle
|
||||||
|
build/
|
||||||
|
!gradle/wrapper/gradle-wrapper.jar
|
||||||
|
!**/src/main/**/build/
|
||||||
|
!**/src/test/**/build/
|
||||||
|
|
||||||
|
### STS ###
|
||||||
|
.apt_generated
|
||||||
|
.classpath
|
||||||
|
.factorypath
|
||||||
|
.project
|
||||||
|
.settings
|
||||||
|
.springBeans
|
||||||
|
.sts4-cache
|
||||||
|
bin/
|
||||||
|
!**/src/main/**/bin/
|
||||||
|
!**/src/test/**/bin/
|
||||||
|
|
||||||
|
### IntelliJ IDEA ###
|
||||||
|
.idea
|
||||||
|
*.iws
|
||||||
|
*.iml
|
||||||
|
*.ipr
|
||||||
|
out/
|
||||||
|
!**/src/main/**/out/
|
||||||
|
!**/src/test/**/out/
|
||||||
|
|
||||||
|
### NetBeans ###
|
||||||
|
/nbproject/private/
|
||||||
|
/nbbuild/
|
||||||
|
/dist/
|
||||||
|
/nbdist/
|
||||||
|
/.nb-gradle/
|
||||||
|
|
||||||
|
### VS Code ###
|
||||||
|
.vscode/
|
||||||
|
|
||||||
|
### 업로드 파일 ###
|
||||||
|
uploads/
|
||||||
|
|
||||||
|
### 환경 설정 ###
|
||||||
|
.env
|
||||||
|
application-local.yml
|
||||||
11
.run/ragone.run.xml
Normal file
11
.run/ragone.run.xml
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
<component name="ProjectRunConfigurationManager">
|
||||||
|
<configuration default="false" name="ragone" type="SpringBootApplicationConfigurationType" factoryName="Spring Boot">
|
||||||
|
<option name="ALTERNATIVE_JRE_PATH" value="C:/Program Files/Java/jdk-21" />
|
||||||
|
<option name="ALTERNATIVE_JRE_PATH_ENABLED" value="true" />
|
||||||
|
<module name="ragone.main" />
|
||||||
|
<option name="SPRING_BOOT_MAIN_CLASS" value="kr.co.ragone.RagoneApplication" />
|
||||||
|
<method v="2">
|
||||||
|
<option name="Make" enabled="true" />
|
||||||
|
</method>
|
||||||
|
</configuration>
|
||||||
|
</component>
|
||||||
53
build.gradle
Normal file
53
build.gradle
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
plugins {
|
||||||
|
id 'java'
|
||||||
|
id 'org.springframework.boot' version '3.2.5'
|
||||||
|
id 'io.spring.dependency-management' version '1.1.4'
|
||||||
|
}
|
||||||
|
|
||||||
|
group = 'kr.co'
|
||||||
|
version = '0.0.1-SNAPSHOT'
|
||||||
|
|
||||||
|
java {
|
||||||
|
sourceCompatibility = '17'
|
||||||
|
}
|
||||||
|
|
||||||
|
configurations {
|
||||||
|
compileOnly {
|
||||||
|
extendsFrom annotationProcessor
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
repositories {
|
||||||
|
mavenCentral()
|
||||||
|
}
|
||||||
|
|
||||||
|
dependencies {
|
||||||
|
// Spring Boot
|
||||||
|
implementation 'org.springframework.boot:spring-boot-starter-web'
|
||||||
|
implementation 'org.springframework.boot:spring-boot-starter-data-jpa'
|
||||||
|
implementation 'org.springframework.boot:spring-boot-starter-validation'
|
||||||
|
|
||||||
|
// PostgreSQL + pgvector
|
||||||
|
implementation 'org.postgresql:postgresql'
|
||||||
|
implementation 'com.pgvector:pgvector:0.1.4'
|
||||||
|
|
||||||
|
// OpenAI
|
||||||
|
implementation 'com.theokanning.openai-gpt3-java:service:0.18.2'
|
||||||
|
|
||||||
|
// 문서 파싱 (PDF, DOCX 등)
|
||||||
|
implementation 'org.apache.tika:tika-core:2.9.1'
|
||||||
|
implementation 'org.apache.tika:tika-parsers-standard-package:2.9.1'
|
||||||
|
|
||||||
|
// 유틸리티
|
||||||
|
compileOnly 'org.projectlombok:lombok'
|
||||||
|
annotationProcessor 'org.projectlombok:lombok'
|
||||||
|
implementation 'org.mapstruct:mapstruct:1.5.5.Final'
|
||||||
|
annotationProcessor 'org.mapstruct:mapstruct-processor:1.5.5.Final'
|
||||||
|
|
||||||
|
// 테스트
|
||||||
|
testImplementation 'org.springframework.boot:spring-boot-starter-test'
|
||||||
|
}
|
||||||
|
|
||||||
|
tasks.named('test') {
|
||||||
|
useJUnitPlatform()
|
||||||
|
}
|
||||||
13
frontend/index.html
Normal file
13
frontend/index.html
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="ko">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8" />
|
||||||
|
<link rel="icon" type="image/svg+xml" href="/vite.svg" />
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||||
|
<title>RAGone - AI 문서 질의응답</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div id="app"></div>
|
||||||
|
<script type="module" src="/src/main.js"></script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
23
frontend/package.json
Normal file
23
frontend/package.json
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
{
|
||||||
|
"name": "ragone-frontend",
|
||||||
|
"version": "0.0.1",
|
||||||
|
"private": true,
|
||||||
|
"type": "module",
|
||||||
|
"scripts": {
|
||||||
|
"dev": "vite",
|
||||||
|
"build": "vite build",
|
||||||
|
"preview": "vite preview"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"vue": "^3.4.21",
|
||||||
|
"vue-router": "^4.3.0",
|
||||||
|
"axios": "^1.6.8",
|
||||||
|
"marked": "^12.0.1",
|
||||||
|
"highlight.js": "^11.9.0"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"@vitejs/plugin-vue": "^5.0.4",
|
||||||
|
"vite": "^5.2.8",
|
||||||
|
"sass": "^1.72.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
15
frontend/vite.config.js
Normal file
15
frontend/vite.config.js
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
import { defineConfig } from 'vite'
|
||||||
|
import vue from '@vitejs/plugin-vue'
|
||||||
|
|
||||||
|
export default defineConfig({
|
||||||
|
plugins: [vue()],
|
||||||
|
server: {
|
||||||
|
port: 3000,
|
||||||
|
proxy: {
|
||||||
|
'/api': {
|
||||||
|
target: 'http://localhost:8080',
|
||||||
|
changeOrigin: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
BIN
gradle/wrapper/gradle-wrapper.jar
vendored
Normal file
BIN
gradle/wrapper/gradle-wrapper.jar
vendored
Normal file
Binary file not shown.
7
gradle/wrapper/gradle-wrapper.properties
vendored
Normal file
7
gradle/wrapper/gradle-wrapper.properties
vendored
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
distributionBase=GRADLE_USER_HOME
|
||||||
|
distributionPath=wrapper/dists
|
||||||
|
distributionUrl=https\://services.gradle.org/distributions/gradle-8.5-bin.zip
|
||||||
|
networkTimeout=10000
|
||||||
|
validateDistributionUrl=true
|
||||||
|
zipStoreBase=GRADLE_USER_HOME
|
||||||
|
zipStorePath=wrapper/dists
|
||||||
251
gradlew
vendored
Normal file
251
gradlew
vendored
Normal file
@@ -0,0 +1,251 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
#
|
||||||
|
# Copyright © 2015 the original authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
#
|
||||||
|
|
||||||
|
##############################################################################
|
||||||
|
#
|
||||||
|
# Gradle start up script for POSIX generated by Gradle.
|
||||||
|
#
|
||||||
|
# Important for running:
|
||||||
|
#
|
||||||
|
# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
|
||||||
|
# noncompliant, but you have some other compliant shell such as ksh or
|
||||||
|
# bash, then to run this script, type that shell name before the whole
|
||||||
|
# command line, like:
|
||||||
|
#
|
||||||
|
# ksh Gradle
|
||||||
|
#
|
||||||
|
# Busybox and similar reduced shells will NOT work, because this script
|
||||||
|
# requires all of these POSIX shell features:
|
||||||
|
# * functions;
|
||||||
|
# * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
|
||||||
|
# «${var#prefix}», «${var%suffix}», and «$( cmd )»;
|
||||||
|
# * compound commands having a testable exit status, especially «case»;
|
||||||
|
# * various built-in commands including «command», «set», and «ulimit».
|
||||||
|
#
|
||||||
|
# Important for patching:
|
||||||
|
#
|
||||||
|
# (2) This script targets any POSIX shell, so it avoids extensions provided
|
||||||
|
# by Bash, Ksh, etc; in particular arrays are avoided.
|
||||||
|
#
|
||||||
|
# The "traditional" practice of packing multiple parameters into a
|
||||||
|
# space-separated string is a well documented source of bugs and security
|
||||||
|
# problems, so this is (mostly) avoided, by progressively accumulating
|
||||||
|
# options in "$@", and eventually passing that to Java.
|
||||||
|
#
|
||||||
|
# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
|
||||||
|
# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
|
||||||
|
# see the in-line comments for details.
|
||||||
|
#
|
||||||
|
# There are tweaks for specific operating systems such as AIX, CygWin,
|
||||||
|
# Darwin, MinGW, and NonStop.
|
||||||
|
#
|
||||||
|
# (3) This script is generated from the Groovy template
|
||||||
|
# https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
|
||||||
|
# within the Gradle project.
|
||||||
|
#
|
||||||
|
# You can find Gradle at https://github.com/gradle/gradle/.
|
||||||
|
#
|
||||||
|
##############################################################################
|
||||||
|
|
||||||
|
# Attempt to set APP_HOME
|
||||||
|
|
||||||
|
# Resolve links: $0 may be a link
|
||||||
|
app_path=$0
|
||||||
|
|
||||||
|
# Need this for daisy-chained symlinks.
|
||||||
|
while
|
||||||
|
APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path
|
||||||
|
[ -h "$app_path" ]
|
||||||
|
do
|
||||||
|
ls=$( ls -ld "$app_path" )
|
||||||
|
link=${ls#*' -> '}
|
||||||
|
case $link in #(
|
||||||
|
/*) app_path=$link ;; #(
|
||||||
|
*) app_path=$APP_HOME$link ;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
# This is normally unused
|
||||||
|
# shellcheck disable=SC2034
|
||||||
|
APP_BASE_NAME=${0##*/}
|
||||||
|
# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
|
||||||
|
APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s\n' "$PWD" ) || exit
|
||||||
|
|
||||||
|
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
||||||
|
MAX_FD=maximum
|
||||||
|
|
||||||
|
warn () {
|
||||||
|
echo "$*"
|
||||||
|
} >&2
|
||||||
|
|
||||||
|
die () {
|
||||||
|
echo
|
||||||
|
echo "$*"
|
||||||
|
echo
|
||||||
|
exit 1
|
||||||
|
} >&2
|
||||||
|
|
||||||
|
# OS specific support (must be 'true' or 'false').
|
||||||
|
cygwin=false
|
||||||
|
msys=false
|
||||||
|
darwin=false
|
||||||
|
nonstop=false
|
||||||
|
case "$( uname )" in #(
|
||||||
|
CYGWIN* ) cygwin=true ;; #(
|
||||||
|
Darwin* ) darwin=true ;; #(
|
||||||
|
MSYS* | MINGW* ) msys=true ;; #(
|
||||||
|
NONSTOP* ) nonstop=true ;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
CLASSPATH="\\\"\\\""
|
||||||
|
|
||||||
|
|
||||||
|
# Determine the Java command to use to start the JVM.
|
||||||
|
if [ -n "$JAVA_HOME" ] ; then
|
||||||
|
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
|
||||||
|
# IBM's JDK on AIX uses strange locations for the executables
|
||||||
|
JAVACMD=$JAVA_HOME/jre/sh/java
|
||||||
|
else
|
||||||
|
JAVACMD=$JAVA_HOME/bin/java
|
||||||
|
fi
|
||||||
|
if [ ! -x "$JAVACMD" ] ; then
|
||||||
|
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
|
||||||
|
|
||||||
|
Please set the JAVA_HOME variable in your environment to match the
|
||||||
|
location of your Java installation."
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
JAVACMD=java
|
||||||
|
if ! command -v java >/dev/null 2>&1
|
||||||
|
then
|
||||||
|
die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
||||||
|
|
||||||
|
Please set the JAVA_HOME variable in your environment to match the
|
||||||
|
location of your Java installation."
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Increase the maximum file descriptors if we can.
|
||||||
|
if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
|
||||||
|
case $MAX_FD in #(
|
||||||
|
max*)
|
||||||
|
# In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
|
||||||
|
# shellcheck disable=SC2039,SC3045
|
||||||
|
MAX_FD=$( ulimit -H -n ) ||
|
||||||
|
warn "Could not query maximum file descriptor limit"
|
||||||
|
esac
|
||||||
|
case $MAX_FD in #(
|
||||||
|
'' | soft) :;; #(
|
||||||
|
*)
|
||||||
|
# In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
|
||||||
|
# shellcheck disable=SC2039,SC3045
|
||||||
|
ulimit -n "$MAX_FD" ||
|
||||||
|
warn "Could not set maximum file descriptor limit to $MAX_FD"
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Collect all arguments for the java command, stacking in reverse order:
|
||||||
|
# * args from the command line
|
||||||
|
# * the main class name
|
||||||
|
# * -classpath
|
||||||
|
# * -D...appname settings
|
||||||
|
# * --module-path (only if needed)
|
||||||
|
# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
|
||||||
|
|
||||||
|
# For Cygwin or MSYS, switch paths to Windows format before running java
|
||||||
|
if "$cygwin" || "$msys" ; then
|
||||||
|
APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
|
||||||
|
CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
|
||||||
|
|
||||||
|
JAVACMD=$( cygpath --unix "$JAVACMD" )
|
||||||
|
|
||||||
|
# Now convert the arguments - kludge to limit ourselves to /bin/sh
|
||||||
|
for arg do
|
||||||
|
if
|
||||||
|
case $arg in #(
|
||||||
|
-*) false ;; # don't mess with options #(
|
||||||
|
/?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath
|
||||||
|
[ -e "$t" ] ;; #(
|
||||||
|
*) false ;;
|
||||||
|
esac
|
||||||
|
then
|
||||||
|
arg=$( cygpath --path --ignore --mixed "$arg" )
|
||||||
|
fi
|
||||||
|
# Roll the args list around exactly as many times as the number of
|
||||||
|
# args, so each arg winds up back in the position where it started, but
|
||||||
|
# possibly modified.
|
||||||
|
#
|
||||||
|
# NB: a `for` loop captures its iteration list before it begins, so
|
||||||
|
# changing the positional parameters here affects neither the number of
|
||||||
|
# iterations, nor the values presented in `arg`.
|
||||||
|
shift # remove old arg
|
||||||
|
set -- "$@" "$arg" # push replacement arg
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
||||||
|
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
|
||||||
|
|
||||||
|
# Collect all arguments for the java command:
|
||||||
|
# * DEFAULT_JVM_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments,
|
||||||
|
# and any embedded shellness will be escaped.
|
||||||
|
# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be
|
||||||
|
# treated as '${Hostname}' itself on the command line.
|
||||||
|
|
||||||
|
set -- \
|
||||||
|
"-Dorg.gradle.appname=$APP_BASE_NAME" \
|
||||||
|
-classpath "$CLASSPATH" \
|
||||||
|
-jar "$APP_HOME/gradle/wrapper/gradle-wrapper.jar" \
|
||||||
|
"$@"
|
||||||
|
|
||||||
|
# Stop when "xargs" is not available.
|
||||||
|
if ! command -v xargs >/dev/null 2>&1
|
||||||
|
then
|
||||||
|
die "xargs is not available"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Use "xargs" to parse quoted args.
|
||||||
|
#
|
||||||
|
# With -n1 it outputs one arg per line, with the quotes and backslashes removed.
|
||||||
|
#
|
||||||
|
# In Bash we could simply go:
|
||||||
|
#
|
||||||
|
# readarray ARGS < <( xargs -n1 <<<"$var" ) &&
|
||||||
|
# set -- "${ARGS[@]}" "$@"
|
||||||
|
#
|
||||||
|
# but POSIX shell has neither arrays nor command substitution, so instead we
|
||||||
|
# post-process each arg (as a line of input to sed) to backslash-escape any
|
||||||
|
# character that might be a shell metacharacter, then use eval to reverse
|
||||||
|
# that process (while maintaining the separation between arguments), and wrap
|
||||||
|
# the whole thing up as a single "set" statement.
|
||||||
|
#
|
||||||
|
# This will of course break if any of these variables contains a newline or
|
||||||
|
# an unmatched quote.
|
||||||
|
#
|
||||||
|
|
||||||
|
eval "set -- $(
|
||||||
|
printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
|
||||||
|
xargs -n1 |
|
||||||
|
sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
|
||||||
|
tr '\n' ' '
|
||||||
|
)" '"$@"'
|
||||||
|
|
||||||
|
exec "$JAVACMD" "$@"
|
||||||
90
gradlew.bat
vendored
Normal file
90
gradlew.bat
vendored
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
@rem
|
||||||
|
@rem Copyright 2015 the original author or authors.
|
||||||
|
@rem
|
||||||
|
@rem Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
@rem you may not use this file except in compliance with the License.
|
||||||
|
@rem You may obtain a copy of the License at
|
||||||
|
@rem
|
||||||
|
@rem https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
@rem
|
||||||
|
@rem Unless required by applicable law or agreed to in writing, software
|
||||||
|
@rem distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
@rem See the License for the specific language governing permissions and
|
||||||
|
@rem limitations under the License.
|
||||||
|
@rem
|
||||||
|
|
||||||
|
@if "%DEBUG%"=="" @echo off
|
||||||
|
@rem ##########################################################################
|
||||||
|
@rem
|
||||||
|
@rem Gradle startup script for Windows
|
||||||
|
@rem
|
||||||
|
@rem ##########################################################################
|
||||||
|
|
||||||
|
@rem Set local scope for the variables with windows NT shell
|
||||||
|
if "%OS%"=="Windows_NT" setlocal
|
||||||
|
|
||||||
|
set DIRNAME=%~dp0
|
||||||
|
if "%DIRNAME%"=="" set DIRNAME=.
|
||||||
|
@rem This is normally unused
|
||||||
|
set APP_BASE_NAME=%~n0
|
||||||
|
set APP_HOME=%DIRNAME%
|
||||||
|
|
||||||
|
@rem Resolve any "." and ".." in APP_HOME to make it shorter.
|
||||||
|
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
|
||||||
|
|
||||||
|
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
||||||
|
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
|
||||||
|
|
||||||
|
@rem Find java.exe
|
||||||
|
if defined JAVA_HOME goto findJavaFromJavaHome
|
||||||
|
|
||||||
|
set JAVA_EXE=java.exe
|
||||||
|
%JAVA_EXE% -version >NUL 2>&1
|
||||||
|
if %ERRORLEVEL% equ 0 goto execute
|
||||||
|
|
||||||
|
echo.
|
||||||
|
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
||||||
|
echo.
|
||||||
|
echo Please set the JAVA_HOME variable in your environment to match the
|
||||||
|
echo location of your Java installation.
|
||||||
|
|
||||||
|
goto fail
|
||||||
|
|
||||||
|
:findJavaFromJavaHome
|
||||||
|
set JAVA_HOME=%JAVA_HOME:"=%
|
||||||
|
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
|
||||||
|
|
||||||
|
if exist "%JAVA_EXE%" goto execute
|
||||||
|
|
||||||
|
echo.
|
||||||
|
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
|
||||||
|
echo.
|
||||||
|
echo Please set the JAVA_HOME variable in your environment to match the
|
||||||
|
echo location of your Java installation.
|
||||||
|
|
||||||
|
goto fail
|
||||||
|
|
||||||
|
:execute
|
||||||
|
@rem Setup the command line
|
||||||
|
|
||||||
|
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
|
||||||
|
|
||||||
|
|
||||||
|
@rem Execute Gradle
|
||||||
|
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
|
||||||
|
|
||||||
|
:end
|
||||||
|
@rem End local scope for the variables with windows NT shell
|
||||||
|
if %ERRORLEVEL% equ 0 goto mainEnd
|
||||||
|
|
||||||
|
:fail
|
||||||
|
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
|
||||||
|
rem having the _script_ exit with the return code.
|
||||||
|
if %GRADLE_EXIT_CONSOLE% equ 1 exit %ERRORLEVEL%
|
||||||
|
exit /b %ERRORLEVEL%
|
||||||
|
|
||||||
|
:mainEnd
|
||||||
|
if "%OS%"=="Windows_NT" endlocal
|
||||||
|
|
||||||
|
:omega
|
||||||
1
settings.gradle
Normal file
1
settings.gradle
Normal file
@@ -0,0 +1 @@
|
|||||||
|
rootProject.name = 'ragone'
|
||||||
12
src/main/java/kr/co/ragone/RagoneApplication.java
Normal file
12
src/main/java/kr/co/ragone/RagoneApplication.java
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
package kr.co.ragone;
|
||||||
|
|
||||||
|
import org.springframework.boot.SpringApplication;
|
||||||
|
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||||
|
|
||||||
|
@SpringBootApplication
|
||||||
|
public class RagoneApplication {
|
||||||
|
|
||||||
|
public static void main(String[] args) {
|
||||||
|
SpringApplication.run(RagoneApplication.class, args);
|
||||||
|
}
|
||||||
|
}
|
||||||
10
src/main/java/kr/co/ragone/config/AsyncConfig.java
Normal file
10
src/main/java/kr/co/ragone/config/AsyncConfig.java
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
package kr.co.ragone.config;
|
||||||
|
|
||||||
|
import org.springframework.context.annotation.Configuration;
|
||||||
|
import org.springframework.scheduling.annotation.EnableAsync;
|
||||||
|
|
||||||
|
@Configuration
|
||||||
|
@EnableAsync
|
||||||
|
public class AsyncConfig {
|
||||||
|
// 비동기 처리 활성화
|
||||||
|
}
|
||||||
30
src/main/java/kr/co/ragone/config/CorsConfig.java
Normal file
30
src/main/java/kr/co/ragone/config/CorsConfig.java
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
package kr.co.ragone.config;
|
||||||
|
|
||||||
|
import org.springframework.context.annotation.Bean;
|
||||||
|
import org.springframework.context.annotation.Configuration;
|
||||||
|
import org.springframework.web.cors.CorsConfiguration;
|
||||||
|
import org.springframework.web.cors.UrlBasedCorsConfigurationSource;
|
||||||
|
import org.springframework.web.filter.CorsFilter;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
@Configuration
|
||||||
|
public class CorsConfig {
|
||||||
|
|
||||||
|
@Bean
|
||||||
|
public CorsFilter corsFilter() {
|
||||||
|
CorsConfiguration config = new CorsConfiguration();
|
||||||
|
config.setAllowCredentials(true);
|
||||||
|
config.setAllowedOrigins(Arrays.asList(
|
||||||
|
"http://localhost:3000",
|
||||||
|
"http://127.0.0.1:3000"
|
||||||
|
));
|
||||||
|
config.setAllowedHeaders(Arrays.asList("*"));
|
||||||
|
config.setAllowedMethods(Arrays.asList("GET", "POST", "PUT", "DELETE", "OPTIONS"));
|
||||||
|
|
||||||
|
UrlBasedCorsConfigurationSource source = new UrlBasedCorsConfigurationSource();
|
||||||
|
source.registerCorsConfiguration("/**", config);
|
||||||
|
|
||||||
|
return new CorsFilter(source);
|
||||||
|
}
|
||||||
|
}
|
||||||
20
src/main/java/kr/co/ragone/config/OpenAiConfig.java
Normal file
20
src/main/java/kr/co/ragone/config/OpenAiConfig.java
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
package kr.co.ragone.config;
|
||||||
|
|
||||||
|
import com.theokanning.openai.service.OpenAiService;
|
||||||
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
|
import org.springframework.context.annotation.Bean;
|
||||||
|
import org.springframework.context.annotation.Configuration;
|
||||||
|
|
||||||
|
import java.time.Duration;
|
||||||
|
|
||||||
|
@Configuration
|
||||||
|
public class OpenAiConfig {
|
||||||
|
|
||||||
|
@Value("${openai.api-key}")
|
||||||
|
private String apiKey;
|
||||||
|
|
||||||
|
@Bean
|
||||||
|
public OpenAiService openAiService() {
|
||||||
|
return new OpenAiService(apiKey, Duration.ofSeconds(60));
|
||||||
|
}
|
||||||
|
}
|
||||||
34
src/main/java/kr/co/ragone/controller/ChatController.java
Normal file
34
src/main/java/kr/co/ragone/controller/ChatController.java
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
package kr.co.ragone.controller;
|
||||||
|
|
||||||
|
import kr.co.ragone.service.ChatService;
|
||||||
|
import lombok.Data;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import org.springframework.http.ResponseEntity;
|
||||||
|
import org.springframework.web.bind.annotation.*;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
@RestController
|
||||||
|
@RequestMapping("/api/chat")
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class ChatController {
|
||||||
|
|
||||||
|
private final ChatService chatService;
|
||||||
|
|
||||||
|
@PostMapping
|
||||||
|
public ResponseEntity<ChatService.RagResponse> chat(@RequestBody ChatRequest request) {
|
||||||
|
ChatService.RagResponse response = chatService.ask(
|
||||||
|
request.getQuestion(),
|
||||||
|
request.getTopicIds(),
|
||||||
|
request.getSessionKey()
|
||||||
|
);
|
||||||
|
return ResponseEntity.ok(response);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Data
|
||||||
|
public static class ChatRequest {
|
||||||
|
private String question;
|
||||||
|
private List<Long> topicIds;
|
||||||
|
private String sessionKey; // 세션 키 (없으면 새로 생성)
|
||||||
|
}
|
||||||
|
}
|
||||||
150
src/main/java/kr/co/ragone/controller/DocumentController.java
Normal file
150
src/main/java/kr/co/ragone/controller/DocumentController.java
Normal file
@@ -0,0 +1,150 @@
|
|||||||
|
package kr.co.ragone.controller;
|
||||||
|
|
||||||
|
import kr.co.ragone.domain.DocInfo;
|
||||||
|
import kr.co.ragone.repository.DocInfoRepository;
|
||||||
|
import kr.co.ragone.service.DocumentIndexingService;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.core.io.Resource;
|
||||||
|
import org.springframework.core.io.UrlResource;
|
||||||
|
import org.springframework.http.HttpHeaders;
|
||||||
|
import org.springframework.http.MediaType;
|
||||||
|
import org.springframework.http.ResponseEntity;
|
||||||
|
import org.springframework.web.bind.annotation.*;
|
||||||
|
import org.springframework.web.multipart.MultipartFile;
|
||||||
|
|
||||||
|
import java.net.URLEncoder;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.nio.file.Paths;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
@RestController
|
||||||
|
@RequestMapping("/api")
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class DocumentController {
|
||||||
|
|
||||||
|
private final DocumentIndexingService documentIndexingService;
|
||||||
|
private final DocInfoRepository docInfoRepository;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 문서 업로드 및 인덱싱
|
||||||
|
*/
|
||||||
|
@PostMapping("/topics/{topicId}/documents/upload")
|
||||||
|
public ResponseEntity<DocInfo> uploadDocument(
|
||||||
|
@PathVariable Long topicId,
|
||||||
|
@RequestParam("file") MultipartFile file) {
|
||||||
|
|
||||||
|
log.info("문서 업로드 요청: topicId={}, fileName={}", topicId, file.getOriginalFilename());
|
||||||
|
|
||||||
|
try {
|
||||||
|
DocInfo docInfo = documentIndexingService.uploadAndIndex(topicId, file);
|
||||||
|
return ResponseEntity.ok(docInfo);
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("문서 업로드 실패", e);
|
||||||
|
return ResponseEntity.badRequest().build();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 주제별 문서 목록 조회
|
||||||
|
*/
|
||||||
|
@GetMapping("/topics/{topicId}/documents")
|
||||||
|
public ResponseEntity<List<DocInfo>> getDocuments(@PathVariable Long topicId) {
|
||||||
|
List<DocInfo> documents = docInfoRepository.findByTopicInfo_TopicId(topicId);
|
||||||
|
return ResponseEntity.ok(documents);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 문서 상세 조회
|
||||||
|
*/
|
||||||
|
@GetMapping("/documents/{docId}")
|
||||||
|
public ResponseEntity<DocInfo> getDocument(@PathVariable Long docId) {
|
||||||
|
return docInfoRepository.findById(docId)
|
||||||
|
.map(ResponseEntity::ok)
|
||||||
|
.orElse(ResponseEntity.notFound().build());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 문서 다운로드
|
||||||
|
*/
|
||||||
|
@GetMapping("/documents/{docId}/download")
|
||||||
|
public ResponseEntity<Resource> downloadDocument(@PathVariable Long docId) {
|
||||||
|
try {
|
||||||
|
DocInfo docInfo = docInfoRepository.findById(docId)
|
||||||
|
.orElseThrow(() -> new IllegalArgumentException("문서를 찾을 수 없습니다: " + docId));
|
||||||
|
|
||||||
|
Path filePath = Paths.get(docInfo.getFilePath());
|
||||||
|
Resource resource = new UrlResource(filePath.toUri());
|
||||||
|
|
||||||
|
if (!resource.exists()) {
|
||||||
|
log.error("파일이 존재하지 않습니다: {}", docInfo.getFilePath());
|
||||||
|
return ResponseEntity.notFound().build();
|
||||||
|
}
|
||||||
|
|
||||||
|
// 파일명 인코딩 (한글 지원)
|
||||||
|
String encodedFileName = URLEncoder.encode(docInfo.getOriginalName(), StandardCharsets.UTF_8)
|
||||||
|
.replaceAll("\\+", "%20");
|
||||||
|
|
||||||
|
// Content-Type 결정
|
||||||
|
String contentType = getContentType(docInfo.getFileType());
|
||||||
|
|
||||||
|
return ResponseEntity.ok()
|
||||||
|
.contentType(MediaType.parseMediaType(contentType))
|
||||||
|
.header(HttpHeaders.CONTENT_DISPOSITION,
|
||||||
|
"attachment; filename=\"" + encodedFileName + "\"; filename*=UTF-8''" + encodedFileName)
|
||||||
|
.body(resource);
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("문서 다운로드 실패", e);
|
||||||
|
return ResponseEntity.badRequest().build();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 문서 삭제
|
||||||
|
*/
|
||||||
|
@DeleteMapping("/documents/{docId}")
|
||||||
|
public ResponseEntity<Void> deleteDocument(@PathVariable Long docId) {
|
||||||
|
try {
|
||||||
|
documentIndexingService.deleteDocument(docId);
|
||||||
|
return ResponseEntity.ok().build();
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("문서 삭제 실패", e);
|
||||||
|
return ResponseEntity.badRequest().build();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 주제별 전체 문서 삭제
|
||||||
|
*/
|
||||||
|
@DeleteMapping("/topics/{topicId}/documents")
|
||||||
|
public ResponseEntity<Void> deleteAllDocuments(@PathVariable Long topicId) {
|
||||||
|
try {
|
||||||
|
documentIndexingService.deleteAllByTopic(topicId);
|
||||||
|
return ResponseEntity.ok().build();
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("전체 문서 삭제 실패", e);
|
||||||
|
return ResponseEntity.badRequest().build();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 파일 확장자에 따른 Content-Type 반환
|
||||||
|
*/
|
||||||
|
private String getContentType(String fileType) {
|
||||||
|
if (fileType == null) {
|
||||||
|
return "application/octet-stream";
|
||||||
|
}
|
||||||
|
return switch (fileType.toLowerCase()) {
|
||||||
|
case "pdf" -> "application/pdf";
|
||||||
|
case "docx" -> "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
|
||||||
|
case "doc" -> "application/msword";
|
||||||
|
case "txt" -> "text/plain; charset=UTF-8";
|
||||||
|
case "xlsx" -> "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
|
||||||
|
case "xls" -> "application/vnd.ms-excel";
|
||||||
|
case "hwp" -> "application/x-hwp";
|
||||||
|
default -> "application/octet-stream";
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
57
src/main/java/kr/co/ragone/controller/TopicController.java
Normal file
57
src/main/java/kr/co/ragone/controller/TopicController.java
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
package kr.co.ragone.controller;
|
||||||
|
|
||||||
|
import kr.co.ragone.domain.TopicInfo;
|
||||||
|
import kr.co.ragone.repository.TopicInfoRepository;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import org.springframework.http.ResponseEntity;
|
||||||
|
import org.springframework.web.bind.annotation.*;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
@RestController
|
||||||
|
@RequestMapping("/api/topics")
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class TopicController {
|
||||||
|
|
||||||
|
private final TopicInfoRepository topicInfoRepository;
|
||||||
|
|
||||||
|
@GetMapping
|
||||||
|
public ResponseEntity<List<TopicInfo>> getTopics() {
|
||||||
|
List<TopicInfo> topics = topicInfoRepository.findByIsActiveTrue();
|
||||||
|
return ResponseEntity.ok(topics);
|
||||||
|
}
|
||||||
|
|
||||||
|
@GetMapping("/{topicId}")
|
||||||
|
public ResponseEntity<TopicInfo> getTopic(@PathVariable Long topicId) {
|
||||||
|
return topicInfoRepository.findById(topicId)
|
||||||
|
.map(ResponseEntity::ok)
|
||||||
|
.orElse(ResponseEntity.notFound().build());
|
||||||
|
}
|
||||||
|
|
||||||
|
@PostMapping
|
||||||
|
public ResponseEntity<TopicInfo> createTopic(@RequestBody TopicInfo topicInfo) {
|
||||||
|
TopicInfo saved = topicInfoRepository.save(topicInfo);
|
||||||
|
return ResponseEntity.ok(saved);
|
||||||
|
}
|
||||||
|
|
||||||
|
@PutMapping("/{topicId}")
|
||||||
|
public ResponseEntity<TopicInfo> updateTopic(
|
||||||
|
@PathVariable Long topicId,
|
||||||
|
@RequestBody TopicInfo topicInfo) {
|
||||||
|
return topicInfoRepository.findById(topicId)
|
||||||
|
.map(existing -> {
|
||||||
|
existing.setTopicName(topicInfo.getTopicName());
|
||||||
|
existing.setTopicDesc(topicInfo.getTopicDesc());
|
||||||
|
existing.setTopicIcon(topicInfo.getTopicIcon());
|
||||||
|
existing.setIsActive(topicInfo.getIsActive());
|
||||||
|
return ResponseEntity.ok(topicInfoRepository.save(existing));
|
||||||
|
})
|
||||||
|
.orElse(ResponseEntity.notFound().build());
|
||||||
|
}
|
||||||
|
|
||||||
|
@DeleteMapping("/{topicId}")
|
||||||
|
public ResponseEntity<Void> deleteTopic(@PathVariable Long topicId) {
|
||||||
|
topicInfoRepository.deleteById(topicId);
|
||||||
|
return ResponseEntity.ok().build();
|
||||||
|
}
|
||||||
|
}
|
||||||
48
src/main/java/kr/co/ragone/domain/ChatMessage.java
Normal file
48
src/main/java/kr/co/ragone/domain/ChatMessage.java
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
package kr.co.ragone.domain;
|
||||||
|
|
||||||
|
import jakarta.persistence.*;
|
||||||
|
import lombok.*;
|
||||||
|
import org.hibernate.annotations.JdbcTypeCode;
|
||||||
|
import org.hibernate.type.SqlTypes;
|
||||||
|
import java.time.LocalDateTime;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
@Entity
|
||||||
|
@Table(name = "TB_CHAT_MESSAGE")
|
||||||
|
@Getter
|
||||||
|
@Setter
|
||||||
|
@NoArgsConstructor
|
||||||
|
@AllArgsConstructor
|
||||||
|
@Builder
|
||||||
|
public class ChatMessage {
|
||||||
|
|
||||||
|
@Id
|
||||||
|
@GeneratedValue(strategy = GenerationType.IDENTITY)
|
||||||
|
@Column(name = "msg_id")
|
||||||
|
private Long msgId;
|
||||||
|
|
||||||
|
@ManyToOne(fetch = FetchType.LAZY)
|
||||||
|
@JoinColumn(name = "session_id")
|
||||||
|
private ChatSession chatSession;
|
||||||
|
|
||||||
|
@Column(name = "msg_role", nullable = false, length = 20)
|
||||||
|
private String msgRole;
|
||||||
|
|
||||||
|
@Column(name = "msg_content", nullable = false, columnDefinition = "TEXT")
|
||||||
|
private String msgContent;
|
||||||
|
|
||||||
|
@Column(name = "topic_ids", columnDefinition = "BIGINT[]")
|
||||||
|
private Long[] topicIds;
|
||||||
|
|
||||||
|
@JdbcTypeCode(SqlTypes.JSON)
|
||||||
|
@Column(name = "source_refs", columnDefinition = "jsonb")
|
||||||
|
private List<Map<String, Object>> sourceRefs;
|
||||||
|
|
||||||
|
@Column(name = "token_count")
|
||||||
|
private Integer tokenCount;
|
||||||
|
|
||||||
|
@Column(name = "created_at")
|
||||||
|
@Builder.Default
|
||||||
|
private LocalDateTime createdAt = LocalDateTime.now();
|
||||||
|
}
|
||||||
37
src/main/java/kr/co/ragone/domain/ChatSession.java
Normal file
37
src/main/java/kr/co/ragone/domain/ChatSession.java
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
package kr.co.ragone.domain;
|
||||||
|
|
||||||
|
import jakarta.persistence.*;
|
||||||
|
import lombok.*;
|
||||||
|
import java.time.LocalDateTime;
|
||||||
|
|
||||||
|
@Entity
|
||||||
|
@Table(name = "TB_CHAT_SESSION")
|
||||||
|
@Getter
|
||||||
|
@Setter
|
||||||
|
@NoArgsConstructor
|
||||||
|
@AllArgsConstructor
|
||||||
|
@Builder
|
||||||
|
public class ChatSession {
|
||||||
|
|
||||||
|
@Id
|
||||||
|
@GeneratedValue(strategy = GenerationType.IDENTITY)
|
||||||
|
@Column(name = "session_id")
|
||||||
|
private Long sessionId;
|
||||||
|
|
||||||
|
@Column(name = "session_key", nullable = false, unique = true, length = 100)
|
||||||
|
private String sessionKey;
|
||||||
|
|
||||||
|
@Column(name = "session_title", length = 255)
|
||||||
|
private String sessionTitle;
|
||||||
|
|
||||||
|
@Column(name = "user_id", length = 100)
|
||||||
|
private String userId;
|
||||||
|
|
||||||
|
@Column(name = "created_at")
|
||||||
|
@Builder.Default
|
||||||
|
private LocalDateTime createdAt = LocalDateTime.now();
|
||||||
|
|
||||||
|
@Column(name = "updated_at")
|
||||||
|
@Builder.Default
|
||||||
|
private LocalDateTime updatedAt = LocalDateTime.now();
|
||||||
|
}
|
||||||
52
src/main/java/kr/co/ragone/domain/DocChunk.java
Normal file
52
src/main/java/kr/co/ragone/domain/DocChunk.java
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
package kr.co.ragone.domain;
|
||||||
|
|
||||||
|
import jakarta.persistence.*;
|
||||||
|
import lombok.*;
|
||||||
|
import org.hibernate.annotations.JdbcTypeCode;
|
||||||
|
import org.hibernate.type.SqlTypes;
|
||||||
|
import java.time.LocalDateTime;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
@Entity
|
||||||
|
@Table(name = "TB_DOC_CHUNK")
|
||||||
|
@Getter
|
||||||
|
@Setter
|
||||||
|
@NoArgsConstructor
|
||||||
|
@AllArgsConstructor
|
||||||
|
@Builder
|
||||||
|
public class DocChunk {
|
||||||
|
|
||||||
|
@Id
|
||||||
|
@GeneratedValue(strategy = GenerationType.IDENTITY)
|
||||||
|
@Column(name = "chunk_id")
|
||||||
|
private Long chunkId;
|
||||||
|
|
||||||
|
@ManyToOne(fetch = FetchType.LAZY)
|
||||||
|
@JoinColumn(name = "doc_id")
|
||||||
|
private DocInfo docInfo;
|
||||||
|
|
||||||
|
@ManyToOne(fetch = FetchType.LAZY)
|
||||||
|
@JoinColumn(name = "topic_id")
|
||||||
|
private TopicInfo topicInfo;
|
||||||
|
|
||||||
|
@Column(name = "chunk_content", nullable = false, columnDefinition = "TEXT")
|
||||||
|
private String chunkContent;
|
||||||
|
|
||||||
|
// pgvector는 Native Query로 처리
|
||||||
|
@Column(name = "chunk_embedding", columnDefinition = "vector(1536)")
|
||||||
|
private String chunkEmbedding;
|
||||||
|
|
||||||
|
@Column(name = "chunk_index")
|
||||||
|
private Integer chunkIndex;
|
||||||
|
|
||||||
|
@Column(name = "token_count")
|
||||||
|
private Integer tokenCount;
|
||||||
|
|
||||||
|
@JdbcTypeCode(SqlTypes.JSON)
|
||||||
|
@Column(name = "chunk_metadata", columnDefinition = "jsonb")
|
||||||
|
private Map<String, Object> chunkMetadata;
|
||||||
|
|
||||||
|
@Column(name = "created_at")
|
||||||
|
@Builder.Default
|
||||||
|
private LocalDateTime createdAt = LocalDateTime.now();
|
||||||
|
}
|
||||||
60
src/main/java/kr/co/ragone/domain/DocInfo.java
Normal file
60
src/main/java/kr/co/ragone/domain/DocInfo.java
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
package kr.co.ragone.domain;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||||
|
import jakarta.persistence.*;
|
||||||
|
import lombok.*;
|
||||||
|
import java.time.LocalDateTime;
|
||||||
|
|
||||||
|
@Entity
|
||||||
|
@Table(name = "TB_DOC_INFO")
|
||||||
|
@Getter
|
||||||
|
@Setter
|
||||||
|
@NoArgsConstructor
|
||||||
|
@AllArgsConstructor
|
||||||
|
@Builder
|
||||||
|
public class DocInfo {
|
||||||
|
|
||||||
|
@Id
|
||||||
|
@GeneratedValue(strategy = GenerationType.IDENTITY)
|
||||||
|
@Column(name = "doc_id")
|
||||||
|
private Long docId;
|
||||||
|
|
||||||
|
@ManyToOne(fetch = FetchType.LAZY)
|
||||||
|
@JoinColumn(name = "topic_id")
|
||||||
|
@JsonIgnoreProperties({"hibernateLazyInitializer", "handler"})
|
||||||
|
private TopicInfo topicInfo;
|
||||||
|
|
||||||
|
@Column(name = "file_name", nullable = false, length = 255)
|
||||||
|
private String fileName;
|
||||||
|
|
||||||
|
@Column(name = "original_name", length = 255)
|
||||||
|
private String originalName;
|
||||||
|
|
||||||
|
@Column(name = "file_path", length = 500)
|
||||||
|
private String filePath;
|
||||||
|
|
||||||
|
@Column(name = "file_size")
|
||||||
|
private Long fileSize;
|
||||||
|
|
||||||
|
@Column(name = "file_type", length = 50)
|
||||||
|
private String fileType;
|
||||||
|
|
||||||
|
@Column(name = "chunk_count")
|
||||||
|
@Builder.Default
|
||||||
|
private Integer chunkCount = 0;
|
||||||
|
|
||||||
|
@Column(name = "doc_status", length = 20)
|
||||||
|
@Builder.Default
|
||||||
|
private String docStatus = "PENDING";
|
||||||
|
|
||||||
|
@Column(name = "error_msg", columnDefinition = "TEXT")
|
||||||
|
private String errorMsg;
|
||||||
|
|
||||||
|
@Column(name = "created_at")
|
||||||
|
@Builder.Default
|
||||||
|
private LocalDateTime createdAt = LocalDateTime.now();
|
||||||
|
|
||||||
|
@Column(name = "updated_at")
|
||||||
|
@Builder.Default
|
||||||
|
private LocalDateTime updatedAt = LocalDateTime.now();
|
||||||
|
}
|
||||||
44
src/main/java/kr/co/ragone/domain/TopicInfo.java
Normal file
44
src/main/java/kr/co/ragone/domain/TopicInfo.java
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
package kr.co.ragone.domain;
|
||||||
|
|
||||||
|
import jakarta.persistence.*;
|
||||||
|
import lombok.*;
|
||||||
|
import java.time.LocalDateTime;
|
||||||
|
|
||||||
|
@Entity
|
||||||
|
@Table(name = "TB_TOPIC_INFO")
|
||||||
|
@Getter
|
||||||
|
@Setter
|
||||||
|
@NoArgsConstructor
|
||||||
|
@AllArgsConstructor
|
||||||
|
@Builder
|
||||||
|
public class TopicInfo {
|
||||||
|
|
||||||
|
@Id
|
||||||
|
@GeneratedValue(strategy = GenerationType.IDENTITY)
|
||||||
|
@Column(name = "topic_id")
|
||||||
|
private Long topicId;
|
||||||
|
|
||||||
|
@Column(name = "topic_code", nullable = false, unique = true, length = 50)
|
||||||
|
private String topicCode;
|
||||||
|
|
||||||
|
@Column(name = "topic_name", nullable = false, length = 100)
|
||||||
|
private String topicName;
|
||||||
|
|
||||||
|
@Column(name = "topic_desc", columnDefinition = "TEXT")
|
||||||
|
private String topicDesc;
|
||||||
|
|
||||||
|
@Column(name = "topic_icon", length = 50)
|
||||||
|
private String topicIcon;
|
||||||
|
|
||||||
|
@Column(name = "is_active")
|
||||||
|
@Builder.Default
|
||||||
|
private Boolean isActive = true;
|
||||||
|
|
||||||
|
@Column(name = "created_at")
|
||||||
|
@Builder.Default
|
||||||
|
private LocalDateTime createdAt = LocalDateTime.now();
|
||||||
|
|
||||||
|
@Column(name = "updated_at")
|
||||||
|
@Builder.Default
|
||||||
|
private LocalDateTime updatedAt = LocalDateTime.now();
|
||||||
|
}
|
||||||
@@ -0,0 +1,13 @@
|
|||||||
|
package kr.co.ragone.repository;
|
||||||
|
|
||||||
|
import kr.co.ragone.domain.ChatMessage;
|
||||||
|
import org.springframework.data.jpa.repository.JpaRepository;
|
||||||
|
import org.springframework.stereotype.Repository;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
@Repository
|
||||||
|
public interface ChatMessageRepository extends JpaRepository<ChatMessage, Long> {
|
||||||
|
|
||||||
|
List<ChatMessage> findByChatSession_SessionIdOrderByCreatedAtAsc(Long sessionId);
|
||||||
|
}
|
||||||
@@ -0,0 +1,13 @@
|
|||||||
|
package kr.co.ragone.repository;
|
||||||
|
|
||||||
|
import kr.co.ragone.domain.ChatSession;
|
||||||
|
import org.springframework.data.jpa.repository.JpaRepository;
|
||||||
|
import org.springframework.stereotype.Repository;
|
||||||
|
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
@Repository
|
||||||
|
public interface ChatSessionRepository extends JpaRepository<ChatSession, Long> {
|
||||||
|
|
||||||
|
Optional<ChatSession> findBySessionKey(String sessionKey);
|
||||||
|
}
|
||||||
@@ -0,0 +1,55 @@
|
|||||||
|
package kr.co.ragone.repository;
|
||||||
|
|
||||||
|
import kr.co.ragone.domain.DocChunk;
|
||||||
|
import org.springframework.data.jpa.repository.JpaRepository;
|
||||||
|
import org.springframework.data.jpa.repository.Query;
|
||||||
|
import org.springframework.data.repository.query.Param;
|
||||||
|
import org.springframework.stereotype.Repository;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
@Repository
|
||||||
|
public interface DocChunkRepository extends JpaRepository<DocChunk, Long> {
|
||||||
|
|
||||||
|
List<DocChunk> findByDocInfo_DocId(Long docId);
|
||||||
|
|
||||||
|
void deleteByDocInfo_DocId(Long docId);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 벡터 유사도 검색 (전체 주제)
|
||||||
|
*/
|
||||||
|
@Query(value = """
|
||||||
|
SELECT c.chunk_id, c.doc_id, c.topic_id, c.chunk_content,
|
||||||
|
c.chunk_index, c.token_count, c.chunk_metadata, c.created_at,
|
||||||
|
1 - (c.chunk_embedding <=> cast(:embedding as vector)) as similarity
|
||||||
|
FROM TB_DOC_CHUNK c
|
||||||
|
WHERE 1 - (c.chunk_embedding <=> cast(:embedding as vector)) > :threshold
|
||||||
|
ORDER BY c.chunk_embedding <=> cast(:embedding as vector)
|
||||||
|
LIMIT :limit
|
||||||
|
""", nativeQuery = true)
|
||||||
|
List<Object[]> findSimilarChunks(
|
||||||
|
@Param("embedding") String embedding,
|
||||||
|
@Param("threshold") double threshold,
|
||||||
|
@Param("limit") int limit
|
||||||
|
);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 벡터 유사도 검색 (특정 주제들)
|
||||||
|
*/
|
||||||
|
@Query(value = """
|
||||||
|
SELECT c.chunk_id, c.doc_id, c.topic_id, c.chunk_content,
|
||||||
|
c.chunk_index, c.token_count, c.chunk_metadata, c.created_at,
|
||||||
|
1 - (c.chunk_embedding <=> cast(:embedding as vector)) as similarity
|
||||||
|
FROM TB_DOC_CHUNK c
|
||||||
|
WHERE c.topic_id = ANY(cast(:topicIds as BIGINT[]))
|
||||||
|
AND 1 - (c.chunk_embedding <=> cast(:embedding as vector)) > :threshold
|
||||||
|
ORDER BY c.chunk_embedding <=> cast(:embedding as vector)
|
||||||
|
LIMIT :limit
|
||||||
|
""", nativeQuery = true)
|
||||||
|
List<Object[]> findSimilarChunksByTopics(
|
||||||
|
@Param("embedding") String embedding,
|
||||||
|
@Param("topicIds") Long[] topicIds,
|
||||||
|
@Param("threshold") double threshold,
|
||||||
|
@Param("limit") int limit
|
||||||
|
);
|
||||||
|
}
|
||||||
15
src/main/java/kr/co/ragone/repository/DocInfoRepository.java
Normal file
15
src/main/java/kr/co/ragone/repository/DocInfoRepository.java
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
package kr.co.ragone.repository;
|
||||||
|
|
||||||
|
import kr.co.ragone.domain.DocInfo;
|
||||||
|
import org.springframework.data.jpa.repository.JpaRepository;
|
||||||
|
import org.springframework.stereotype.Repository;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
@Repository
|
||||||
|
public interface DocInfoRepository extends JpaRepository<DocInfo, Long> {
|
||||||
|
|
||||||
|
List<DocInfo> findByTopicInfo_TopicId(Long topicId);
|
||||||
|
|
||||||
|
List<DocInfo> findByDocStatus(String docStatus);
|
||||||
|
}
|
||||||
@@ -0,0 +1,16 @@
|
|||||||
|
package kr.co.ragone.repository;
|
||||||
|
|
||||||
|
import kr.co.ragone.domain.TopicInfo;
|
||||||
|
import org.springframework.data.jpa.repository.JpaRepository;
|
||||||
|
import org.springframework.stereotype.Repository;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
@Repository
|
||||||
|
public interface TopicInfoRepository extends JpaRepository<TopicInfo, Long> {
|
||||||
|
|
||||||
|
Optional<TopicInfo> findByTopicCode(String topicCode);
|
||||||
|
|
||||||
|
List<TopicInfo> findByIsActiveTrue();
|
||||||
|
}
|
||||||
275
src/main/java/kr/co/ragone/service/ChatService.java
Normal file
275
src/main/java/kr/co/ragone/service/ChatService.java
Normal file
@@ -0,0 +1,275 @@
|
|||||||
|
package kr.co.ragone.service;
|
||||||
|
|
||||||
|
import com.theokanning.openai.completion.chat.ChatCompletionRequest;
|
||||||
|
import com.theokanning.openai.completion.chat.ChatCompletionResult;
|
||||||
|
import com.theokanning.openai.completion.chat.ChatMessage;
|
||||||
|
import com.theokanning.openai.service.OpenAiService;
|
||||||
|
import kr.co.ragone.domain.ChatSession;
|
||||||
|
import kr.co.ragone.repository.ChatMessageRepository;
|
||||||
|
import kr.co.ragone.repository.ChatSessionRepository;
|
||||||
|
import kr.co.ragone.repository.DocChunkRepository;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
import org.springframework.transaction.annotation.Transactional;
|
||||||
|
|
||||||
|
import java.time.LocalDateTime;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.UUID;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
@Service
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class ChatService {
|
||||||
|
|
||||||
|
private final OpenAiService openAiService;
|
||||||
|
private final EmbeddingService embeddingService;
|
||||||
|
private final DocChunkRepository docChunkRepository;
|
||||||
|
private final ChatSessionRepository chatSessionRepository;
|
||||||
|
private final ChatMessageRepository chatMessageRepository;
|
||||||
|
|
||||||
|
@Value("${openai.model.chat}")
|
||||||
|
private String chatModel;
|
||||||
|
|
||||||
|
@Value("${rag.retrieval.top-k}")
|
||||||
|
private int topK;
|
||||||
|
|
||||||
|
@Value("${rag.retrieval.similarity-threshold}")
|
||||||
|
private double similarityThreshold;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* RAG 기반 질의응답 (세션 저장 포함)
|
||||||
|
*/
|
||||||
|
@Transactional
|
||||||
|
public RagResponse ask(String question, List<Long> topicIds, String sessionKey) {
|
||||||
|
// 1. 세션 조회 또는 생성
|
||||||
|
ChatSession session = getOrCreateSession(sessionKey, question);
|
||||||
|
|
||||||
|
// 2. 사용자 메시지 저장
|
||||||
|
saveMessage(session, "user", question, topicIds, null);
|
||||||
|
|
||||||
|
// 3. 질문 임베딩
|
||||||
|
String questionEmbedding = embeddingService.createEmbeddingAsString(question);
|
||||||
|
log.info("[RAG] Question: {}", question);
|
||||||
|
log.info("[RAG] TopicIds: {}", topicIds);
|
||||||
|
log.info("[RAG] Threshold: {}, TopK: {}", similarityThreshold, topK);
|
||||||
|
|
||||||
|
// 4. 유사 문서 검색
|
||||||
|
List<Object[]> chunks;
|
||||||
|
if (topicIds == null || topicIds.isEmpty()) {
|
||||||
|
log.info("[RAG] Searching ALL topics");
|
||||||
|
chunks = docChunkRepository.findSimilarChunks(
|
||||||
|
questionEmbedding, similarityThreshold, topK);
|
||||||
|
} else {
|
||||||
|
log.info("[RAG] Searching specific topics: {}", topicIds);
|
||||||
|
chunks = docChunkRepository.findSimilarChunksByTopics(
|
||||||
|
questionEmbedding, topicIds.toArray(new Long[0]),
|
||||||
|
similarityThreshold, topK);
|
||||||
|
}
|
||||||
|
log.info("[RAG] Found {} relevant chunks", chunks.size());
|
||||||
|
|
||||||
|
// 청크 내용 로깅
|
||||||
|
for (int i = 0; i < chunks.size(); i++) {
|
||||||
|
Object[] row = chunks.get(i);
|
||||||
|
String content = (String) row[3];
|
||||||
|
Double similarity = ((Number) row[8]).doubleValue();
|
||||||
|
log.info("[RAG] Chunk {}: similarity={}, content={}",
|
||||||
|
i + 1, String.format("%.3f", similarity),
|
||||||
|
content.substring(0, Math.min(100, content.length())));
|
||||||
|
}
|
||||||
|
|
||||||
|
// 5. 컨텍스트 구성
|
||||||
|
String context = buildContext(chunks);
|
||||||
|
|
||||||
|
// 6. 프롬프트 구성 및 GPT 호출
|
||||||
|
String answer = generateAnswer(question, context, chunks.isEmpty());
|
||||||
|
|
||||||
|
// 7. AI 응답 메시지 저장
|
||||||
|
List<SourceInfo> sources = extractSources(chunks);
|
||||||
|
saveMessage(session, "assistant", answer, topicIds, sources);
|
||||||
|
|
||||||
|
// 8. 응답 구성
|
||||||
|
return RagResponse.builder()
|
||||||
|
.sessionKey(session.getSessionKey())
|
||||||
|
.answer(answer)
|
||||||
|
.sources(sources)
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 세션 조회 또는 생성
|
||||||
|
*/
|
||||||
|
private ChatSession getOrCreateSession(String sessionKey, String firstQuestion) {
|
||||||
|
if (sessionKey != null && !sessionKey.isBlank()) {
|
||||||
|
return chatSessionRepository.findBySessionKey(sessionKey)
|
||||||
|
.orElseGet(() -> createSession(sessionKey, firstQuestion));
|
||||||
|
}
|
||||||
|
return createSession(UUID.randomUUID().toString(), firstQuestion);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 새 세션 생성
|
||||||
|
*/
|
||||||
|
private ChatSession createSession(String sessionKey, String title) {
|
||||||
|
String sessionTitle = title.length() > 50 ? title.substring(0, 50) + "..." : title;
|
||||||
|
|
||||||
|
ChatSession session = ChatSession.builder()
|
||||||
|
.sessionKey(sessionKey)
|
||||||
|
.sessionTitle(sessionTitle)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
return chatSessionRepository.save(session);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 메시지 저장
|
||||||
|
*/
|
||||||
|
private void saveMessage(ChatSession session, String role, String content,
|
||||||
|
List<Long> topicIds, List<SourceInfo> sources) {
|
||||||
|
kr.co.ragone.domain.ChatMessage message = kr.co.ragone.domain.ChatMessage.builder()
|
||||||
|
.chatSession(session)
|
||||||
|
.msgRole(role)
|
||||||
|
.msgContent(content)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
chatMessageRepository.save(message);
|
||||||
|
|
||||||
|
session.setUpdatedAt(LocalDateTime.now());
|
||||||
|
chatSessionRepository.save(session);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String buildContext(List<Object[]> chunks) {
|
||||||
|
if (chunks.isEmpty()) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
sb.append("=== 검색된 문서 내용 ===\n\n");
|
||||||
|
|
||||||
|
for (int i = 0; i < chunks.size(); i++) {
|
||||||
|
Object[] row = chunks.get(i);
|
||||||
|
String content = (String) row[3]; // chunk_content
|
||||||
|
Double similarity = ((Number) row[8]).doubleValue(); // similarity
|
||||||
|
|
||||||
|
sb.append(String.format("【문서 %d】 (관련도: %.0f%%)\n", i + 1, similarity * 100));
|
||||||
|
sb.append("─".repeat(40)).append("\n");
|
||||||
|
sb.append(content.trim());
|
||||||
|
sb.append("\n\n");
|
||||||
|
}
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
private String generateAnswer(String question, String context, boolean noContext) {
|
||||||
|
String systemPrompt;
|
||||||
|
|
||||||
|
if (noContext) {
|
||||||
|
systemPrompt = """
|
||||||
|
당신은 친절한 문서 기반 질의응답 어시스턴트입니다.
|
||||||
|
현재 검색된 관련 문서가 없습니다.
|
||||||
|
|
||||||
|
사용자에게 다음을 안내해주세요:
|
||||||
|
1. 해당 질문과 관련된 문서가 시스템에 등록되어 있지 않을 수 있습니다.
|
||||||
|
2. 더 구체적인 키워드로 질문하면 도움이 될 수 있습니다.
|
||||||
|
3. 관리자에게 관련 문서 등록을 요청할 수 있습니다.
|
||||||
|
|
||||||
|
단, 일반적인 상식이나 공개된 정보로 답변 가능한 경우 도움을 드릴 수 있습니다.
|
||||||
|
""";
|
||||||
|
} else {
|
||||||
|
systemPrompt = """
|
||||||
|
당신은 전문적인 데이터 분석 및 문서 기반 질의응답 어시스턴트입니다.
|
||||||
|
|
||||||
|
【역할】
|
||||||
|
- 제공된 문서 내용을 깊이 있게 분석하여 답변합니다.
|
||||||
|
- 데이터를 요약, 비교, 분석하여 인사이트를 제공합니다.
|
||||||
|
- 사용자가 이해하기 쉽게 구조화된 답변을 합니다.
|
||||||
|
|
||||||
|
【답변 규칙】
|
||||||
|
1. 문서에 있는 정보를 최대한 활용하여 상세히 답변하세요.
|
||||||
|
2. 숫자, 날짜, 이름 등 구체적인 정보가 있으면 반드시 포함하세요.
|
||||||
|
3. 여러 문서의 정보를 종합하여 분석적인 답변을 제공하세요.
|
||||||
|
4. 표나 목록 형태로 정리하면 좋은 내용은 구조화하세요.
|
||||||
|
5. 문서에서 직접 확인되지 않는 내용은 추측하지 마세요.
|
||||||
|
6. 답변 마지막에 참고한 문서 번호를 명시하세요.
|
||||||
|
|
||||||
|
【분석 관점】
|
||||||
|
- 진행 상황, 진척률, 일정 관련 질문: 구체적인 수치와 기간을 제시
|
||||||
|
- 비교 질문: 표 형태로 비교 정리
|
||||||
|
- 요약 질문: 핵심 포인트를 불릿으로 정리
|
||||||
|
- 추세/변화 질문: 시간순 또는 단계별로 설명
|
||||||
|
""";
|
||||||
|
}
|
||||||
|
|
||||||
|
String userPrompt;
|
||||||
|
if (noContext) {
|
||||||
|
userPrompt = String.format("""
|
||||||
|
[질문]
|
||||||
|
%s
|
||||||
|
|
||||||
|
관련 문서를 찾을 수 없었습니다.
|
||||||
|
위 안내에 따라 사용자에게 도움이 되는 응답을 해주세요.
|
||||||
|
""", question);
|
||||||
|
} else {
|
||||||
|
userPrompt = String.format("""
|
||||||
|
%s
|
||||||
|
|
||||||
|
[질문]
|
||||||
|
%s
|
||||||
|
|
||||||
|
위 문서 내용을 분석하여 질문에 상세히 답변해주세요.
|
||||||
|
""", context, question);
|
||||||
|
}
|
||||||
|
|
||||||
|
List<ChatMessage> messages = new ArrayList<>();
|
||||||
|
messages.add(new ChatMessage("system", systemPrompt));
|
||||||
|
messages.add(new ChatMessage("user", userPrompt));
|
||||||
|
|
||||||
|
ChatCompletionRequest request = ChatCompletionRequest.builder()
|
||||||
|
.model(chatModel)
|
||||||
|
.messages(messages)
|
||||||
|
.temperature(0.3)
|
||||||
|
.maxTokens(2000) // 더 긴 답변 허용
|
||||||
|
.build();
|
||||||
|
|
||||||
|
ChatCompletionResult result = openAiService.createChatCompletion(request);
|
||||||
|
|
||||||
|
return result.getChoices().get(0).getMessage().getContent();
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<SourceInfo> extractSources(List<Object[]> chunks) {
|
||||||
|
return chunks.stream()
|
||||||
|
.map(row -> SourceInfo.builder()
|
||||||
|
.chunkId(((Number) row[0]).longValue())
|
||||||
|
.docId(((Number) row[1]).longValue())
|
||||||
|
.content(truncate((String) row[3], 150))
|
||||||
|
.similarity(((Number) row[8]).doubleValue())
|
||||||
|
.build())
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
|
private String truncate(String text, int maxLength) {
|
||||||
|
if (text.length() <= maxLength) {
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
return text.substring(0, maxLength) + "...";
|
||||||
|
}
|
||||||
|
|
||||||
|
@lombok.Data
|
||||||
|
@lombok.Builder
|
||||||
|
public static class RagResponse {
|
||||||
|
private String sessionKey;
|
||||||
|
private String answer;
|
||||||
|
private List<SourceInfo> sources;
|
||||||
|
}
|
||||||
|
|
||||||
|
@lombok.Data
|
||||||
|
@lombok.Builder
|
||||||
|
public static class SourceInfo {
|
||||||
|
private Long chunkId;
|
||||||
|
private Long docId;
|
||||||
|
private String content;
|
||||||
|
private Double similarity;
|
||||||
|
}
|
||||||
|
}
|
||||||
202
src/main/java/kr/co/ragone/service/ChunkingService.java
Normal file
202
src/main/java/kr/co/ragone/service/ChunkingService.java
Normal file
@@ -0,0 +1,202 @@
|
|||||||
|
package kr.co.ragone.service;
|
||||||
|
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
@Service
|
||||||
|
public class ChunkingService {
|
||||||
|
|
||||||
|
@Value("${rag.chunk.size:1000}")
|
||||||
|
private int chunkSize;
|
||||||
|
|
||||||
|
@Value("${rag.chunk.overlap:100}")
|
||||||
|
private int chunkOverlap;
|
||||||
|
|
||||||
|
// 최소 청크 크기 (이보다 작으면 단일 청크로)
|
||||||
|
private static final int MIN_CHUNK_SIZE = 50;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 텍스트를 청크로 분할
|
||||||
|
*/
|
||||||
|
public List<ChunkResult> chunkText(String text) {
|
||||||
|
List<ChunkResult> chunks = new ArrayList<>();
|
||||||
|
|
||||||
|
if (text == null || text.isBlank()) {
|
||||||
|
log.warn("빈 텍스트가 입력되었습니다.");
|
||||||
|
return chunks;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 텍스트 정규화
|
||||||
|
text = normalizeText(text);
|
||||||
|
|
||||||
|
log.info("청킹 시작: 원본 {}자, 청크 크기 {}, 오버랩 {}",
|
||||||
|
text.length(), chunkSize, chunkOverlap);
|
||||||
|
|
||||||
|
// 텍스트가 짧으면 단일 청크로
|
||||||
|
if (text.length() <= chunkSize) {
|
||||||
|
chunks.add(createChunk(text, 0));
|
||||||
|
log.info("텍스트가 짧아 단일 청크로 생성: {}자", text.length());
|
||||||
|
return chunks;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 문장 단위로 분할 후 청크 구성
|
||||||
|
List<String> sentences = splitIntoSentences(text);
|
||||||
|
log.debug("문장 {} 개로 분할됨", sentences.size());
|
||||||
|
|
||||||
|
StringBuilder currentChunk = new StringBuilder();
|
||||||
|
int chunkIndex = 0;
|
||||||
|
|
||||||
|
for (String sentence : sentences) {
|
||||||
|
// 현재 청크에 문장 추가 시 크기 초과하면 저장
|
||||||
|
if (currentChunk.length() + sentence.length() > chunkSize && currentChunk.length() >= MIN_CHUNK_SIZE) {
|
||||||
|
chunks.add(createChunk(currentChunk.toString().trim(), chunkIndex++));
|
||||||
|
|
||||||
|
// 오버랩 처리
|
||||||
|
String overlap = getOverlapText(currentChunk.toString());
|
||||||
|
currentChunk = new StringBuilder(overlap);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (currentChunk.length() > 0 && !currentChunk.toString().endsWith(" ")) {
|
||||||
|
currentChunk.append(" ");
|
||||||
|
}
|
||||||
|
currentChunk.append(sentence.trim());
|
||||||
|
}
|
||||||
|
|
||||||
|
// 마지막 청크 저장
|
||||||
|
if (currentChunk.length() >= MIN_CHUNK_SIZE) {
|
||||||
|
chunks.add(createChunk(currentChunk.toString().trim(), chunkIndex));
|
||||||
|
} else if (currentChunk.length() > 0 && !chunks.isEmpty()) {
|
||||||
|
// 너무 짧으면 이전 청크에 병합
|
||||||
|
ChunkResult lastChunk = chunks.get(chunks.size() - 1);
|
||||||
|
String merged = lastChunk.getContent() + " " + currentChunk.toString().trim();
|
||||||
|
chunks.set(chunks.size() - 1, createChunk(merged, lastChunk.getIndex()));
|
||||||
|
} else if (currentChunk.length() > 0) {
|
||||||
|
chunks.add(createChunk(currentChunk.toString().trim(), chunkIndex));
|
||||||
|
}
|
||||||
|
|
||||||
|
log.info("청킹 완료: {} 청크 생성", chunks.size());
|
||||||
|
return chunks;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 텍스트 정규화
|
||||||
|
*/
|
||||||
|
private String normalizeText(String text) {
|
||||||
|
return text
|
||||||
|
// 연속 공백 제거
|
||||||
|
.replaceAll("[ \\t]+", " ")
|
||||||
|
// 연속 줄바꿈 정리
|
||||||
|
.replaceAll("\\n{2,}", "\n\n")
|
||||||
|
.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 문장 단위로 분할
|
||||||
|
*/
|
||||||
|
private List<String> splitIntoSentences(String text) {
|
||||||
|
List<String> sentences = new ArrayList<>();
|
||||||
|
|
||||||
|
// 한국어/영어 문장 종결 패턴
|
||||||
|
// . ! ? 뒤에 공백이나 줄바꿈이 오는 경우
|
||||||
|
Pattern sentencePattern = Pattern.compile("(?<=[.!?。])\\s+|(?<=\\n)");
|
||||||
|
|
||||||
|
String[] parts = sentencePattern.split(text);
|
||||||
|
|
||||||
|
for (String part : parts) {
|
||||||
|
String trimmed = part.trim();
|
||||||
|
if (!trimmed.isEmpty()) {
|
||||||
|
sentences.add(trimmed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 문장 분할이 잘 안되면 (문장이 1개인 경우) 단어 수 기준으로 분할
|
||||||
|
if (sentences.size() <= 1 && text.length() > chunkSize) {
|
||||||
|
sentences = splitByWords(text, chunkSize / 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
return sentences;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 단어 수 기준으로 분할 (문장 분할 실패 시 폴백)
|
||||||
|
*/
|
||||||
|
private List<String> splitByWords(String text, int wordsPerChunk) {
|
||||||
|
List<String> chunks = new ArrayList<>();
|
||||||
|
String[] words = text.split("\\s+");
|
||||||
|
|
||||||
|
StringBuilder current = new StringBuilder();
|
||||||
|
int wordCount = 0;
|
||||||
|
|
||||||
|
for (String word : words) {
|
||||||
|
if (wordCount >= wordsPerChunk && current.length() > 0) {
|
||||||
|
chunks.add(current.toString().trim());
|
||||||
|
current = new StringBuilder();
|
||||||
|
wordCount = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (current.length() > 0) {
|
||||||
|
current.append(" ");
|
||||||
|
}
|
||||||
|
current.append(word);
|
||||||
|
wordCount++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (current.length() > 0) {
|
||||||
|
chunks.add(current.toString().trim());
|
||||||
|
}
|
||||||
|
|
||||||
|
return chunks;
|
||||||
|
}
|
||||||
|
|
||||||
|
private ChunkResult createChunk(String content, int index) {
|
||||||
|
return ChunkResult.builder()
|
||||||
|
.content(content)
|
||||||
|
.index(index)
|
||||||
|
.tokenCount(estimateTokenCount(content))
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getOverlapText(String text) {
|
||||||
|
if (text.length() <= chunkOverlap) {
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
// 단어 경계에서 자르기
|
||||||
|
String overlap = text.substring(text.length() - chunkOverlap);
|
||||||
|
int spaceIndex = overlap.indexOf(' ');
|
||||||
|
if (spaceIndex > 0) {
|
||||||
|
overlap = overlap.substring(spaceIndex + 1);
|
||||||
|
}
|
||||||
|
return overlap;
|
||||||
|
}
|
||||||
|
|
||||||
|
private int estimateTokenCount(String text) {
|
||||||
|
// 대략적인 토큰 수 추정
|
||||||
|
int koreanChars = 0;
|
||||||
|
int otherChars = 0;
|
||||||
|
|
||||||
|
for (char c : text.toCharArray()) {
|
||||||
|
if (Character.UnicodeScript.of(c) == Character.UnicodeScript.HANGUL) {
|
||||||
|
koreanChars++;
|
||||||
|
} else {
|
||||||
|
otherChars++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 한글은 약 1.5자당 1토큰, 영어는 4자당 1토큰
|
||||||
|
return (int) (koreanChars / 1.5 + otherChars / 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
@lombok.Data
|
||||||
|
@lombok.Builder
|
||||||
|
public static class ChunkResult {
|
||||||
|
private String content;
|
||||||
|
private int index;
|
||||||
|
private int tokenCount;
|
||||||
|
}
|
||||||
|
}
|
||||||
235
src/main/java/kr/co/ragone/service/DocumentIndexingService.java
Normal file
235
src/main/java/kr/co/ragone/service/DocumentIndexingService.java
Normal file
@@ -0,0 +1,235 @@
|
|||||||
|
package kr.co.ragone.service;
|
||||||
|
|
||||||
|
import kr.co.ragone.domain.DocChunk;
|
||||||
|
import kr.co.ragone.domain.DocInfo;
|
||||||
|
import kr.co.ragone.domain.TopicInfo;
|
||||||
|
import kr.co.ragone.repository.DocChunkRepository;
|
||||||
|
import kr.co.ragone.repository.DocInfoRepository;
|
||||||
|
import kr.co.ragone.repository.TopicInfoRepository;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
|
import org.springframework.jdbc.core.JdbcTemplate;
|
||||||
|
import org.springframework.scheduling.annotation.Async;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
import org.springframework.transaction.annotation.Transactional;
|
||||||
|
import org.springframework.web.multipart.MultipartFile;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.nio.file.Paths;
|
||||||
|
import java.time.LocalDateTime;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
@Service
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class DocumentIndexingService {
|
||||||
|
|
||||||
|
private final TopicInfoRepository topicInfoRepository;
|
||||||
|
private final DocInfoRepository docInfoRepository;
|
||||||
|
private final DocChunkRepository docChunkRepository;
|
||||||
|
private final DocumentParserService documentParserService;
|
||||||
|
private final ChunkingService chunkingService;
|
||||||
|
private final EmbeddingService embeddingService;
|
||||||
|
private final JdbcTemplate jdbcTemplate;
|
||||||
|
|
||||||
|
@Value("${file.upload-dir:./uploads}")
|
||||||
|
private String uploadDir;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 문서 업로드 및 인덱싱
|
||||||
|
*/
|
||||||
|
@Transactional
|
||||||
|
public DocInfo uploadAndIndex(Long topicId, MultipartFile file) throws Exception {
|
||||||
|
// 1. 주제 확인
|
||||||
|
TopicInfo topicInfo = topicInfoRepository.findById(topicId)
|
||||||
|
.orElseThrow(() -> new IllegalArgumentException("주제를 찾을 수 없습니다: " + topicId));
|
||||||
|
|
||||||
|
// 2. 파일 저장
|
||||||
|
String savedFileName = saveFile(file);
|
||||||
|
String filePath = Paths.get(uploadDir, savedFileName).toString();
|
||||||
|
|
||||||
|
// 3. 문서 정보 저장 (PROCESSING 상태)
|
||||||
|
DocInfo docInfo = DocInfo.builder()
|
||||||
|
.topicInfo(topicInfo)
|
||||||
|
.fileName(savedFileName)
|
||||||
|
.originalName(file.getOriginalFilename())
|
||||||
|
.filePath(filePath)
|
||||||
|
.fileSize(file.getSize())
|
||||||
|
.fileType(getFileExtension(file.getOriginalFilename()))
|
||||||
|
.docStatus("PROCESSING")
|
||||||
|
.build();
|
||||||
|
docInfo = docInfoRepository.save(docInfo);
|
||||||
|
|
||||||
|
// 4. 비동기로 인덱싱 처리
|
||||||
|
processIndexingAsync(docInfo.getDocId(), topicInfo, file);
|
||||||
|
|
||||||
|
return docInfo;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 비동기 인덱싱 처리
|
||||||
|
*/
|
||||||
|
@Async
|
||||||
|
public void processIndexingAsync(Long docId, TopicInfo topicInfo, MultipartFile file) {
|
||||||
|
try {
|
||||||
|
processIndexing(docId, topicInfo, file);
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("인덱싱 실패: docId={}", docId, e);
|
||||||
|
updateDocStatus(docId, "FAILED", e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 실제 인덱싱 처리
|
||||||
|
*/
|
||||||
|
private void processIndexing(Long docId, TopicInfo topicInfo, MultipartFile file) throws Exception {
|
||||||
|
log.info("인덱싱 시작: docId={}, fileName={}", docId, file.getOriginalFilename());
|
||||||
|
|
||||||
|
// 1. 문서 파싱
|
||||||
|
String content = documentParserService.parseDocument(file);
|
||||||
|
if (content == null || content.isBlank()) {
|
||||||
|
throw new RuntimeException("문서 내용이 비어있습니다.");
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. 청킹
|
||||||
|
List<ChunkingService.ChunkResult> chunks = chunkingService.chunkText(content);
|
||||||
|
if (chunks.isEmpty()) {
|
||||||
|
throw new RuntimeException("청크 생성 실패");
|
||||||
|
}
|
||||||
|
log.info("청크 생성 완료: {} chunks", chunks.size());
|
||||||
|
|
||||||
|
// 3. 각 청크에 대해 임베딩 생성 및 저장
|
||||||
|
DocInfo docInfo = docInfoRepository.findById(docId)
|
||||||
|
.orElseThrow(() -> new RuntimeException("문서를 찾을 수 없습니다."));
|
||||||
|
|
||||||
|
for (ChunkingService.ChunkResult chunk : chunks) {
|
||||||
|
// 임베딩 생성
|
||||||
|
String embeddingVector = embeddingService.createEmbeddingAsString(chunk.getContent());
|
||||||
|
|
||||||
|
// Native Query로 벡터 저장
|
||||||
|
saveChunkWithEmbedding(docInfo, topicInfo, chunk, embeddingVector);
|
||||||
|
|
||||||
|
log.debug("청크 저장 완료: index={}", chunk.getIndex());
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. 문서 상태 업데이트
|
||||||
|
updateDocStatus(docId, "INDEXED", null);
|
||||||
|
updateChunkCount(docId, chunks.size());
|
||||||
|
|
||||||
|
log.info("인덱싱 완료: docId={}, chunks={}", docId, chunks.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 청크 + 벡터 저장 (Native Query 사용)
|
||||||
|
*/
|
||||||
|
private void saveChunkWithEmbedding(DocInfo docInfo, TopicInfo topicInfo,
|
||||||
|
ChunkingService.ChunkResult chunk, String embedding) {
|
||||||
|
String sql = """
|
||||||
|
INSERT INTO TB_DOC_CHUNK
|
||||||
|
(doc_id, topic_id, chunk_content, chunk_embedding, chunk_index, token_count, created_at)
|
||||||
|
VALUES (?, ?, ?, ?::vector, ?, ?, ?)
|
||||||
|
""";
|
||||||
|
|
||||||
|
jdbcTemplate.update(sql,
|
||||||
|
docInfo.getDocId(),
|
||||||
|
topicInfo.getTopicId(),
|
||||||
|
chunk.getContent(),
|
||||||
|
embedding,
|
||||||
|
chunk.getIndex(),
|
||||||
|
chunk.getTokenCount(),
|
||||||
|
LocalDateTime.now()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 파일 저장
|
||||||
|
*/
|
||||||
|
private String saveFile(MultipartFile file) throws IOException {
|
||||||
|
Path uploadPath = Paths.get(uploadDir);
|
||||||
|
if (!Files.exists(uploadPath)) {
|
||||||
|
Files.createDirectories(uploadPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
String originalFilename = file.getOriginalFilename();
|
||||||
|
String extension = getFileExtension(originalFilename);
|
||||||
|
String savedFileName = UUID.randomUUID().toString() + "." + extension;
|
||||||
|
|
||||||
|
Path filePath = uploadPath.resolve(savedFileName);
|
||||||
|
Files.copy(file.getInputStream(), filePath);
|
||||||
|
|
||||||
|
log.info("파일 저장: {}", filePath);
|
||||||
|
return savedFileName;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getFileExtension(String filename) {
|
||||||
|
if (filename == null) return "";
|
||||||
|
int lastDot = filename.lastIndexOf('.');
|
||||||
|
return lastDot > 0 ? filename.substring(lastDot + 1).toLowerCase() : "";
|
||||||
|
}
|
||||||
|
|
||||||
|
private void updateDocStatus(Long docId, String status, String errorMsg) {
|
||||||
|
docInfoRepository.findById(docId).ifPresent(doc -> {
|
||||||
|
doc.setDocStatus(status);
|
||||||
|
doc.setErrorMsg(errorMsg);
|
||||||
|
doc.setUpdatedAt(LocalDateTime.now());
|
||||||
|
docInfoRepository.save(doc);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private void updateChunkCount(Long docId, int count) {
|
||||||
|
docInfoRepository.findById(docId).ifPresent(doc -> {
|
||||||
|
doc.setChunkCount(count);
|
||||||
|
doc.setUpdatedAt(LocalDateTime.now());
|
||||||
|
docInfoRepository.save(doc);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 문서 삭제 (청크 포함)
|
||||||
|
*/
|
||||||
|
@Transactional
|
||||||
|
public void deleteDocument(Long docId) {
|
||||||
|
DocInfo docInfo = docInfoRepository.findById(docId)
|
||||||
|
.orElseThrow(() -> new IllegalArgumentException("문서를 찾을 수 없습니다: " + docId));
|
||||||
|
|
||||||
|
// 파일 삭제
|
||||||
|
try {
|
||||||
|
Path filePath = Paths.get(docInfo.getFilePath());
|
||||||
|
Files.deleteIfExists(filePath);
|
||||||
|
} catch (IOException e) {
|
||||||
|
log.warn("파일 삭제 실패: {}", docInfo.getFilePath(), e);
|
||||||
|
}
|
||||||
|
|
||||||
|
// DB 삭제 (CASCADE로 청크도 함께 삭제됨)
|
||||||
|
docInfoRepository.delete(docInfo);
|
||||||
|
log.info("문서 삭제 완료: docId={}", docId);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 주제별 전체 문서 삭제
|
||||||
|
*/
|
||||||
|
@Transactional
|
||||||
|
public void deleteAllByTopic(Long topicId) {
|
||||||
|
List<DocInfo> documents = docInfoRepository.findByTopicInfo_TopicId(topicId);
|
||||||
|
|
||||||
|
log.info("전체 문서 삭제 시작: topicId={}, count={}", topicId, documents.size());
|
||||||
|
|
||||||
|
for (DocInfo docInfo : documents) {
|
||||||
|
// 파일 삭제
|
||||||
|
try {
|
||||||
|
Path filePath = Paths.get(docInfo.getFilePath());
|
||||||
|
Files.deleteIfExists(filePath);
|
||||||
|
} catch (IOException e) {
|
||||||
|
log.warn("파일 삭제 실패: {}", docInfo.getFilePath(), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// DB 삭제 (CASCADE로 청크도 함께 삭제됨)
|
||||||
|
docInfoRepository.deleteAll(documents);
|
||||||
|
log.info("전체 문서 삭제 완료: topicId={}, count={}", topicId, documents.size());
|
||||||
|
}
|
||||||
|
}
|
||||||
120
src/main/java/kr/co/ragone/service/DocumentParserService.java
Normal file
120
src/main/java/kr/co/ragone/service/DocumentParserService.java
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
package kr.co.ragone.service;
|
||||||
|
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.apache.tika.Tika;
|
||||||
|
import org.apache.tika.config.TikaConfig;
|
||||||
|
import org.apache.tika.exception.TikaException;
|
||||||
|
import org.apache.tika.metadata.Metadata;
|
||||||
|
import org.apache.tika.parser.AutoDetectParser;
|
||||||
|
import org.apache.tika.parser.ParseContext;
|
||||||
|
import org.apache.tika.parser.Parser;
|
||||||
|
import org.apache.tika.parser.pdf.PDFParserConfig;
|
||||||
|
import org.apache.tika.sax.BodyContentHandler;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
import org.springframework.web.multipart.MultipartFile;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
@Service
|
||||||
|
public class DocumentParserService {
|
||||||
|
|
||||||
|
private final Tika tika = new Tika();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 파일에서 텍스트 추출 (PDF, DOCX, TXT 등 지원)
|
||||||
|
*/
|
||||||
|
public String parseDocument(MultipartFile file) throws IOException, TikaException {
|
||||||
|
String filename = file.getOriginalFilename();
|
||||||
|
log.info("문서 파싱 시작: {}", filename);
|
||||||
|
|
||||||
|
try (InputStream inputStream = file.getInputStream()) {
|
||||||
|
String content;
|
||||||
|
|
||||||
|
// PDF인 경우 특별 처리
|
||||||
|
if (filename != null && filename.toLowerCase().endsWith(".pdf")) {
|
||||||
|
content = parsePdfWithOptions(inputStream);
|
||||||
|
} else {
|
||||||
|
content = tika.parseToString(inputStream);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 텍스트 정제
|
||||||
|
content = cleanText(content);
|
||||||
|
|
||||||
|
log.info("문서 파싱 완료: {} chars", content.length());
|
||||||
|
|
||||||
|
// 텍스트가 너무 짧으면 경고
|
||||||
|
if (content.length() < 100) {
|
||||||
|
log.warn("⚠️ 추출된 텍스트가 매우 짧습니다. PDF가 이미지 기반일 수 있습니다.");
|
||||||
|
log.warn("원본 파일: {}, 추출 길이: {} chars", filename, content.length());
|
||||||
|
}
|
||||||
|
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* PDF 파싱 옵션 설정
|
||||||
|
*/
|
||||||
|
private String parsePdfWithOptions(InputStream inputStream) throws IOException, TikaException {
|
||||||
|
try {
|
||||||
|
// PDF 파서 설정
|
||||||
|
PDFParserConfig pdfConfig = new PDFParserConfig();
|
||||||
|
pdfConfig.setExtractInlineImages(true);
|
||||||
|
pdfConfig.setExtractUniqueInlineImagesOnly(true);
|
||||||
|
pdfConfig.setOcrStrategy(PDFParserConfig.OCR_STRATEGY.AUTO); // OCR 자동 시도
|
||||||
|
|
||||||
|
ParseContext parseContext = new ParseContext();
|
||||||
|
parseContext.set(PDFParserConfig.class, pdfConfig);
|
||||||
|
|
||||||
|
// 파서 설정
|
||||||
|
Parser parser = new AutoDetectParser();
|
||||||
|
parseContext.set(Parser.class, parser);
|
||||||
|
|
||||||
|
// 메타데이터 및 콘텐츠 핸들러
|
||||||
|
Metadata metadata = new Metadata();
|
||||||
|
BodyContentHandler handler = new BodyContentHandler(-1); // 무제한
|
||||||
|
|
||||||
|
parser.parse(inputStream, handler, metadata, parseContext);
|
||||||
|
|
||||||
|
// 메타데이터 로깅
|
||||||
|
log.debug("PDF 메타데이터:");
|
||||||
|
for (String name : metadata.names()) {
|
||||||
|
log.debug(" {}: {}", name, metadata.get(name));
|
||||||
|
}
|
||||||
|
|
||||||
|
return handler.toString();
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("PDF 파싱 실패, 기본 파싱으로 재시도", e);
|
||||||
|
// 기본 파싱으로 폴백
|
||||||
|
return tika.parseToString(inputStream);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 텍스트 정제
|
||||||
|
*/
|
||||||
|
private String cleanText(String text) {
|
||||||
|
if (text == null) return "";
|
||||||
|
|
||||||
|
return text
|
||||||
|
// 연속 공백을 단일 공백으로
|
||||||
|
.replaceAll("[ \\t]+", " ")
|
||||||
|
// 연속 줄바꿈을 2개로 제한
|
||||||
|
.replaceAll("\\n{3,}", "\n\n")
|
||||||
|
// 앞뒤 공백 제거
|
||||||
|
.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 파일에서 텍스트 추출 (파일 경로로)
|
||||||
|
*/
|
||||||
|
public String parseDocument(java.io.File file) throws IOException, TikaException {
|
||||||
|
log.info("문서 파싱 시작: {}", file.getName());
|
||||||
|
String content = tika.parseToString(file);
|
||||||
|
content = cleanText(content);
|
||||||
|
log.info("문서 파싱 완료: {} chars", content.length());
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
}
|
||||||
55
src/main/java/kr/co/ragone/service/EmbeddingService.java
Normal file
55
src/main/java/kr/co/ragone/service/EmbeddingService.java
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
package kr.co.ragone.service;
|
||||||
|
|
||||||
|
import com.theokanning.openai.embedding.EmbeddingRequest;
|
||||||
|
import com.theokanning.openai.embedding.EmbeddingResult;
|
||||||
|
import com.theokanning.openai.service.OpenAiService;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
@Service
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class EmbeddingService {
|
||||||
|
|
||||||
|
private final OpenAiService openAiService;
|
||||||
|
|
||||||
|
@Value("${openai.model.embedding}")
|
||||||
|
private String embeddingModel;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 텍스트를 임베딩 벡터로 변환
|
||||||
|
*/
|
||||||
|
public List<Double> createEmbedding(String text) {
|
||||||
|
EmbeddingRequest request = EmbeddingRequest.builder()
|
||||||
|
.model(embeddingModel)
|
||||||
|
.input(Collections.singletonList(text))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
EmbeddingResult result = openAiService.createEmbeddings(request);
|
||||||
|
|
||||||
|
return result.getData().get(0).getEmbedding();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 벡터를 PostgreSQL vector 형식 문자열로 변환
|
||||||
|
*/
|
||||||
|
public String toVectorString(List<Double> embedding) {
|
||||||
|
return "[" + embedding.stream()
|
||||||
|
.map(String::valueOf)
|
||||||
|
.collect(Collectors.joining(",")) + "]";
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 텍스트를 PostgreSQL vector 형식으로 직접 변환
|
||||||
|
*/
|
||||||
|
public String createEmbeddingAsString(String text) {
|
||||||
|
List<Double> embedding = createEmbedding(text);
|
||||||
|
return toVectorString(embedding);
|
||||||
|
}
|
||||||
|
}
|
||||||
54
src/main/resources/application.yml
Normal file
54
src/main/resources/application.yml
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
server:
|
||||||
|
port: 8080
|
||||||
|
|
||||||
|
spring:
|
||||||
|
application:
|
||||||
|
name: ragone
|
||||||
|
|
||||||
|
profiles:
|
||||||
|
active: local
|
||||||
|
|
||||||
|
datasource:
|
||||||
|
url: jdbc:postgresql://172.25.0.79:5432/turbosoft_rag_db
|
||||||
|
username: turbosoft
|
||||||
|
password: xjqhthvmxm123
|
||||||
|
driver-class-name: org.postgresql.Driver
|
||||||
|
|
||||||
|
jpa:
|
||||||
|
hibernate:
|
||||||
|
ddl-auto: validate
|
||||||
|
show-sql: true
|
||||||
|
properties:
|
||||||
|
hibernate:
|
||||||
|
format_sql: true
|
||||||
|
dialect: org.hibernate.dialect.PostgreSQLDialect
|
||||||
|
|
||||||
|
servlet:
|
||||||
|
multipart:
|
||||||
|
max-file-size: 50MB
|
||||||
|
max-request-size: 50MB
|
||||||
|
|
||||||
|
# OpenAI 설정
|
||||||
|
openai:
|
||||||
|
api-key: ${OPENAI_API_KEY:your-api-key-here}
|
||||||
|
model:
|
||||||
|
embedding: text-embedding-3-small
|
||||||
|
chat: gpt-4o-mini
|
||||||
|
|
||||||
|
# RAG 설정
|
||||||
|
rag:
|
||||||
|
chunk:
|
||||||
|
size: 1000
|
||||||
|
overlap: 100
|
||||||
|
retrieval:
|
||||||
|
top-k: 10
|
||||||
|
similarity-threshold: 0.3 # 더 낮춰서 검색 범위 확대
|
||||||
|
|
||||||
|
# 파일 저장 경로
|
||||||
|
file:
|
||||||
|
upload-dir: ./uploads
|
||||||
|
|
||||||
|
logging:
|
||||||
|
level:
|
||||||
|
kr.co.ragone: DEBUG
|
||||||
|
org.hibernate.SQL: DEBUG
|
||||||
Reference in New Issue
Block a user