This commit is contained in:
2025-12-11 02:09:57 +09:00
parent 6c80670b47
commit 8749de6aef
34 changed files with 2115 additions and 0 deletions

44
.gitignore vendored Normal file
View File

@@ -0,0 +1,44 @@
HELP.md
.gradle
build/
!gradle/wrapper/gradle-wrapper.jar
!**/src/main/**/build/
!**/src/test/**/build/
### STS ###
.apt_generated
.classpath
.factorypath
.project
.settings
.springBeans
.sts4-cache
bin/
!**/src/main/**/bin/
!**/src/test/**/bin/
### IntelliJ IDEA ###
.idea
*.iws
*.iml
*.ipr
out/
!**/src/main/**/out/
!**/src/test/**/out/
### NetBeans ###
/nbproject/private/
/nbbuild/
/dist/
/nbdist/
/.nb-gradle/
### VS Code ###
.vscode/
### 업로드 파일 ###
uploads/
### 환경 설정 ###
.env
application-local.yml

11
.run/ragone.run.xml Normal file
View File

@@ -0,0 +1,11 @@
<component name="ProjectRunConfigurationManager">
<configuration default="false" name="ragone" type="SpringBootApplicationConfigurationType" factoryName="Spring Boot">
<option name="ALTERNATIVE_JRE_PATH" value="C:/Program Files/Java/jdk-21" />
<option name="ALTERNATIVE_JRE_PATH_ENABLED" value="true" />
<module name="ragone.main" />
<option name="SPRING_BOOT_MAIN_CLASS" value="kr.co.ragone.RagoneApplication" />
<method v="2">
<option name="Make" enabled="true" />
</method>
</configuration>
</component>

53
build.gradle Normal file
View File

@@ -0,0 +1,53 @@
plugins {
id 'java'
id 'org.springframework.boot' version '3.2.5'
id 'io.spring.dependency-management' version '1.1.4'
}
group = 'kr.co'
version = '0.0.1-SNAPSHOT'
java {
sourceCompatibility = '17'
}
configurations {
compileOnly {
extendsFrom annotationProcessor
}
}
repositories {
mavenCentral()
}
dependencies {
// Spring Boot
implementation 'org.springframework.boot:spring-boot-starter-web'
implementation 'org.springframework.boot:spring-boot-starter-data-jpa'
implementation 'org.springframework.boot:spring-boot-starter-validation'
// PostgreSQL + pgvector
implementation 'org.postgresql:postgresql'
implementation 'com.pgvector:pgvector:0.1.4'
// OpenAI
implementation 'com.theokanning.openai-gpt3-java:service:0.18.2'
// 문서 파싱 (PDF, DOCX 등)
implementation 'org.apache.tika:tika-core:2.9.1'
implementation 'org.apache.tika:tika-parsers-standard-package:2.9.1'
// 유틸리티
compileOnly 'org.projectlombok:lombok'
annotationProcessor 'org.projectlombok:lombok'
implementation 'org.mapstruct:mapstruct:1.5.5.Final'
annotationProcessor 'org.mapstruct:mapstruct-processor:1.5.5.Final'
// 테스트
testImplementation 'org.springframework.boot:spring-boot-starter-test'
}
tasks.named('test') {
useJUnitPlatform()
}

13
frontend/index.html Normal file
View File

@@ -0,0 +1,13 @@
<!DOCTYPE html>
<html lang="ko">
<head>
<meta charset="UTF-8" />
<link rel="icon" type="image/svg+xml" href="/vite.svg" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>RAGone - AI 문서 질의응답</title>
</head>
<body>
<div id="app"></div>
<script type="module" src="/src/main.js"></script>
</body>
</html>

23
frontend/package.json Normal file
View File

@@ -0,0 +1,23 @@
{
"name": "ragone-frontend",
"version": "0.0.1",
"private": true,
"type": "module",
"scripts": {
"dev": "vite",
"build": "vite build",
"preview": "vite preview"
},
"dependencies": {
"vue": "^3.4.21",
"vue-router": "^4.3.0",
"axios": "^1.6.8",
"marked": "^12.0.1",
"highlight.js": "^11.9.0"
},
"devDependencies": {
"@vitejs/plugin-vue": "^5.0.4",
"vite": "^5.2.8",
"sass": "^1.72.0"
}
}

15
frontend/vite.config.js Normal file
View File

@@ -0,0 +1,15 @@
import { defineConfig } from 'vite'
import vue from '@vitejs/plugin-vue'
export default defineConfig({
plugins: [vue()],
server: {
port: 3000,
proxy: {
'/api': {
target: 'http://localhost:8080',
changeOrigin: true
}
}
}
})

BIN
gradle/wrapper/gradle-wrapper.jar vendored Normal file

Binary file not shown.

View File

@@ -0,0 +1,7 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-8.5-bin.zip
networkTimeout=10000
validateDistributionUrl=true
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists

251
gradlew vendored Normal file
View File

@@ -0,0 +1,251 @@
#!/bin/sh
#
# Copyright © 2015 the original authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0
#
##############################################################################
#
# Gradle start up script for POSIX generated by Gradle.
#
# Important for running:
#
# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
# noncompliant, but you have some other compliant shell such as ksh or
# bash, then to run this script, type that shell name before the whole
# command line, like:
#
# ksh Gradle
#
# Busybox and similar reduced shells will NOT work, because this script
# requires all of these POSIX shell features:
# * functions;
# * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
# «${var#prefix}», «${var%suffix}», and «$( cmd )»;
# * compound commands having a testable exit status, especially «case»;
# * various built-in commands including «command», «set», and «ulimit».
#
# Important for patching:
#
# (2) This script targets any POSIX shell, so it avoids extensions provided
# by Bash, Ksh, etc; in particular arrays are avoided.
#
# The "traditional" practice of packing multiple parameters into a
# space-separated string is a well documented source of bugs and security
# problems, so this is (mostly) avoided, by progressively accumulating
# options in "$@", and eventually passing that to Java.
#
# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
# see the in-line comments for details.
#
# There are tweaks for specific operating systems such as AIX, CygWin,
# Darwin, MinGW, and NonStop.
#
# (3) This script is generated from the Groovy template
# https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
# within the Gradle project.
#
# You can find Gradle at https://github.com/gradle/gradle/.
#
##############################################################################
# Attempt to set APP_HOME
# Resolve links: $0 may be a link
app_path=$0
# Need this for daisy-chained symlinks.
while
APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path
[ -h "$app_path" ]
do
ls=$( ls -ld "$app_path" )
link=${ls#*' -> '}
case $link in #(
/*) app_path=$link ;; #(
*) app_path=$APP_HOME$link ;;
esac
done
# This is normally unused
# shellcheck disable=SC2034
APP_BASE_NAME=${0##*/}
# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s\n' "$PWD" ) || exit
# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD=maximum
warn () {
echo "$*"
} >&2
die () {
echo
echo "$*"
echo
exit 1
} >&2
# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "$( uname )" in #(
CYGWIN* ) cygwin=true ;; #(
Darwin* ) darwin=true ;; #(
MSYS* | MINGW* ) msys=true ;; #(
NONSTOP* ) nonstop=true ;;
esac
CLASSPATH="\\\"\\\""
# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
# IBM's JDK on AIX uses strange locations for the executables
JAVACMD=$JAVA_HOME/jre/sh/java
else
JAVACMD=$JAVA_HOME/bin/java
fi
if [ ! -x "$JAVACMD" ] ; then
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
else
JAVACMD=java
if ! command -v java >/dev/null 2>&1
then
die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
fi
# Increase the maximum file descriptors if we can.
if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
case $MAX_FD in #(
max*)
# In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
# shellcheck disable=SC2039,SC3045
MAX_FD=$( ulimit -H -n ) ||
warn "Could not query maximum file descriptor limit"
esac
case $MAX_FD in #(
'' | soft) :;; #(
*)
# In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
# shellcheck disable=SC2039,SC3045
ulimit -n "$MAX_FD" ||
warn "Could not set maximum file descriptor limit to $MAX_FD"
esac
fi
# Collect all arguments for the java command, stacking in reverse order:
# * args from the command line
# * the main class name
# * -classpath
# * -D...appname settings
# * --module-path (only if needed)
# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
# For Cygwin or MSYS, switch paths to Windows format before running java
if "$cygwin" || "$msys" ; then
APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
JAVACMD=$( cygpath --unix "$JAVACMD" )
# Now convert the arguments - kludge to limit ourselves to /bin/sh
for arg do
if
case $arg in #(
-*) false ;; # don't mess with options #(
/?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath
[ -e "$t" ] ;; #(
*) false ;;
esac
then
arg=$( cygpath --path --ignore --mixed "$arg" )
fi
# Roll the args list around exactly as many times as the number of
# args, so each arg winds up back in the position where it started, but
# possibly modified.
#
# NB: a `for` loop captures its iteration list before it begins, so
# changing the positional parameters here affects neither the number of
# iterations, nor the values presented in `arg`.
shift # remove old arg
set -- "$@" "$arg" # push replacement arg
done
fi
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
# Collect all arguments for the java command:
# * DEFAULT_JVM_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments,
# and any embedded shellness will be escaped.
# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be
# treated as '${Hostname}' itself on the command line.
set -- \
"-Dorg.gradle.appname=$APP_BASE_NAME" \
-classpath "$CLASSPATH" \
-jar "$APP_HOME/gradle/wrapper/gradle-wrapper.jar" \
"$@"
# Stop when "xargs" is not available.
if ! command -v xargs >/dev/null 2>&1
then
die "xargs is not available"
fi
# Use "xargs" to parse quoted args.
#
# With -n1 it outputs one arg per line, with the quotes and backslashes removed.
#
# In Bash we could simply go:
#
# readarray ARGS < <( xargs -n1 <<<"$var" ) &&
# set -- "${ARGS[@]}" "$@"
#
# but POSIX shell has neither arrays nor command substitution, so instead we
# post-process each arg (as a line of input to sed) to backslash-escape any
# character that might be a shell metacharacter, then use eval to reverse
# that process (while maintaining the separation between arguments), and wrap
# the whole thing up as a single "set" statement.
#
# This will of course break if any of these variables contains a newline or
# an unmatched quote.
#
eval "set -- $(
printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
xargs -n1 |
sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
tr '\n' ' '
)" '"$@"'
exec "$JAVACMD" "$@"

90
gradlew.bat vendored Normal file
View File

@@ -0,0 +1,90 @@
@rem
@rem Copyright 2015 the original author or authors.
@rem
@rem Licensed under the Apache License, Version 2.0 (the "License");
@rem you may not use this file except in compliance with the License.
@rem You may obtain a copy of the License at
@rem
@rem https://www.apache.org/licenses/LICENSE-2.0
@rem
@rem Unless required by applicable law or agreed to in writing, software
@rem distributed under the License is distributed on an "AS IS" BASIS,
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@rem
@if "%DEBUG%"=="" @echo off
@rem ##########################################################################
@rem
@rem Gradle startup script for Windows
@rem
@rem ##########################################################################
@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal
set DIRNAME=%~dp0
if "%DIRNAME%"=="" set DIRNAME=.
@rem This is normally unused
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%
@rem Resolve any "." and ".." in APP_HOME to make it shorter.
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome
set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if %ERRORLEVEL% equ 0 goto execute
echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.
goto fail
:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
if exist "%JAVA_EXE%" goto execute
echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.
goto fail
:execute
@rem Setup the command line
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
:end
@rem End local scope for the variables with windows NT shell
if %ERRORLEVEL% equ 0 goto mainEnd
:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem having the _script_ exit with the return code.
if %GRADLE_EXIT_CONSOLE% equ 1 exit %ERRORLEVEL%
exit /b %ERRORLEVEL%
:mainEnd
if "%OS%"=="Windows_NT" endlocal
:omega

1
settings.gradle Normal file
View File

@@ -0,0 +1 @@
rootProject.name = 'ragone'

View File

@@ -0,0 +1,12 @@
package kr.co.ragone;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
@SpringBootApplication
public class RagoneApplication {
public static void main(String[] args) {
SpringApplication.run(RagoneApplication.class, args);
}
}

View File

@@ -0,0 +1,10 @@
package kr.co.ragone.config;
import org.springframework.context.annotation.Configuration;
import org.springframework.scheduling.annotation.EnableAsync;
@Configuration
@EnableAsync
public class AsyncConfig {
// 비동기 처리 활성화
}

View File

@@ -0,0 +1,30 @@
package kr.co.ragone.config;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.web.cors.CorsConfiguration;
import org.springframework.web.cors.UrlBasedCorsConfigurationSource;
import org.springframework.web.filter.CorsFilter;
import java.util.Arrays;
@Configuration
public class CorsConfig {
@Bean
public CorsFilter corsFilter() {
CorsConfiguration config = new CorsConfiguration();
config.setAllowCredentials(true);
config.setAllowedOrigins(Arrays.asList(
"http://localhost:3000",
"http://127.0.0.1:3000"
));
config.setAllowedHeaders(Arrays.asList("*"));
config.setAllowedMethods(Arrays.asList("GET", "POST", "PUT", "DELETE", "OPTIONS"));
UrlBasedCorsConfigurationSource source = new UrlBasedCorsConfigurationSource();
source.registerCorsConfiguration("/**", config);
return new CorsFilter(source);
}
}

View File

@@ -0,0 +1,20 @@
package kr.co.ragone.config;
import com.theokanning.openai.service.OpenAiService;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import java.time.Duration;
@Configuration
public class OpenAiConfig {
@Value("${openai.api-key}")
private String apiKey;
@Bean
public OpenAiService openAiService() {
return new OpenAiService(apiKey, Duration.ofSeconds(60));
}
}

View File

@@ -0,0 +1,34 @@
package kr.co.ragone.controller;
import kr.co.ragone.service.ChatService;
import lombok.Data;
import lombok.RequiredArgsConstructor;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;
import java.util.List;
@RestController
@RequestMapping("/api/chat")
@RequiredArgsConstructor
public class ChatController {
private final ChatService chatService;
@PostMapping
public ResponseEntity<ChatService.RagResponse> chat(@RequestBody ChatRequest request) {
ChatService.RagResponse response = chatService.ask(
request.getQuestion(),
request.getTopicIds(),
request.getSessionKey()
);
return ResponseEntity.ok(response);
}
@Data
public static class ChatRequest {
private String question;
private List<Long> topicIds;
private String sessionKey; // 세션 키 (없으면 새로 생성)
}
}

View File

@@ -0,0 +1,150 @@
package kr.co.ragone.controller;
import kr.co.ragone.domain.DocInfo;
import kr.co.ragone.repository.DocInfoRepository;
import kr.co.ragone.service.DocumentIndexingService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.core.io.Resource;
import org.springframework.core.io.UrlResource;
import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
@Slf4j
@RestController
@RequestMapping("/api")
@RequiredArgsConstructor
public class DocumentController {
private final DocumentIndexingService documentIndexingService;
private final DocInfoRepository docInfoRepository;
/**
* 문서 업로드 및 인덱싱
*/
@PostMapping("/topics/{topicId}/documents/upload")
public ResponseEntity<DocInfo> uploadDocument(
@PathVariable Long topicId,
@RequestParam("file") MultipartFile file) {
log.info("문서 업로드 요청: topicId={}, fileName={}", topicId, file.getOriginalFilename());
try {
DocInfo docInfo = documentIndexingService.uploadAndIndex(topicId, file);
return ResponseEntity.ok(docInfo);
} catch (Exception e) {
log.error("문서 업로드 실패", e);
return ResponseEntity.badRequest().build();
}
}
/**
* 주제별 문서 목록 조회
*/
@GetMapping("/topics/{topicId}/documents")
public ResponseEntity<List<DocInfo>> getDocuments(@PathVariable Long topicId) {
List<DocInfo> documents = docInfoRepository.findByTopicInfo_TopicId(topicId);
return ResponseEntity.ok(documents);
}
/**
* 문서 상세 조회
*/
@GetMapping("/documents/{docId}")
public ResponseEntity<DocInfo> getDocument(@PathVariable Long docId) {
return docInfoRepository.findById(docId)
.map(ResponseEntity::ok)
.orElse(ResponseEntity.notFound().build());
}
/**
* 문서 다운로드
*/
@GetMapping("/documents/{docId}/download")
public ResponseEntity<Resource> downloadDocument(@PathVariable Long docId) {
try {
DocInfo docInfo = docInfoRepository.findById(docId)
.orElseThrow(() -> new IllegalArgumentException("문서를 찾을 수 없습니다: " + docId));
Path filePath = Paths.get(docInfo.getFilePath());
Resource resource = new UrlResource(filePath.toUri());
if (!resource.exists()) {
log.error("파일이 존재하지 않습니다: {}", docInfo.getFilePath());
return ResponseEntity.notFound().build();
}
// 파일명 인코딩 (한글 지원)
String encodedFileName = URLEncoder.encode(docInfo.getOriginalName(), StandardCharsets.UTF_8)
.replaceAll("\\+", "%20");
// Content-Type 결정
String contentType = getContentType(docInfo.getFileType());
return ResponseEntity.ok()
.contentType(MediaType.parseMediaType(contentType))
.header(HttpHeaders.CONTENT_DISPOSITION,
"attachment; filename=\"" + encodedFileName + "\"; filename*=UTF-8''" + encodedFileName)
.body(resource);
} catch (Exception e) {
log.error("문서 다운로드 실패", e);
return ResponseEntity.badRequest().build();
}
}
/**
* 문서 삭제
*/
@DeleteMapping("/documents/{docId}")
public ResponseEntity<Void> deleteDocument(@PathVariable Long docId) {
try {
documentIndexingService.deleteDocument(docId);
return ResponseEntity.ok().build();
} catch (Exception e) {
log.error("문서 삭제 실패", e);
return ResponseEntity.badRequest().build();
}
}
/**
* 주제별 전체 문서 삭제
*/
@DeleteMapping("/topics/{topicId}/documents")
public ResponseEntity<Void> deleteAllDocuments(@PathVariable Long topicId) {
try {
documentIndexingService.deleteAllByTopic(topicId);
return ResponseEntity.ok().build();
} catch (Exception e) {
log.error("전체 문서 삭제 실패", e);
return ResponseEntity.badRequest().build();
}
}
/**
* 파일 확장자에 따른 Content-Type 반환
*/
private String getContentType(String fileType) {
if (fileType == null) {
return "application/octet-stream";
}
return switch (fileType.toLowerCase()) {
case "pdf" -> "application/pdf";
case "docx" -> "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
case "doc" -> "application/msword";
case "txt" -> "text/plain; charset=UTF-8";
case "xlsx" -> "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
case "xls" -> "application/vnd.ms-excel";
case "hwp" -> "application/x-hwp";
default -> "application/octet-stream";
};
}
}

View File

@@ -0,0 +1,57 @@
package kr.co.ragone.controller;
import kr.co.ragone.domain.TopicInfo;
import kr.co.ragone.repository.TopicInfoRepository;
import lombok.RequiredArgsConstructor;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;
import java.util.List;
@RestController
@RequestMapping("/api/topics")
@RequiredArgsConstructor
public class TopicController {
private final TopicInfoRepository topicInfoRepository;
@GetMapping
public ResponseEntity<List<TopicInfo>> getTopics() {
List<TopicInfo> topics = topicInfoRepository.findByIsActiveTrue();
return ResponseEntity.ok(topics);
}
@GetMapping("/{topicId}")
public ResponseEntity<TopicInfo> getTopic(@PathVariable Long topicId) {
return topicInfoRepository.findById(topicId)
.map(ResponseEntity::ok)
.orElse(ResponseEntity.notFound().build());
}
@PostMapping
public ResponseEntity<TopicInfo> createTopic(@RequestBody TopicInfo topicInfo) {
TopicInfo saved = topicInfoRepository.save(topicInfo);
return ResponseEntity.ok(saved);
}
@PutMapping("/{topicId}")
public ResponseEntity<TopicInfo> updateTopic(
@PathVariable Long topicId,
@RequestBody TopicInfo topicInfo) {
return topicInfoRepository.findById(topicId)
.map(existing -> {
existing.setTopicName(topicInfo.getTopicName());
existing.setTopicDesc(topicInfo.getTopicDesc());
existing.setTopicIcon(topicInfo.getTopicIcon());
existing.setIsActive(topicInfo.getIsActive());
return ResponseEntity.ok(topicInfoRepository.save(existing));
})
.orElse(ResponseEntity.notFound().build());
}
@DeleteMapping("/{topicId}")
public ResponseEntity<Void> deleteTopic(@PathVariable Long topicId) {
topicInfoRepository.deleteById(topicId);
return ResponseEntity.ok().build();
}
}

View File

@@ -0,0 +1,48 @@
package kr.co.ragone.domain;
import jakarta.persistence.*;
import lombok.*;
import org.hibernate.annotations.JdbcTypeCode;
import org.hibernate.type.SqlTypes;
import java.time.LocalDateTime;
import java.util.List;
import java.util.Map;
@Entity
@Table(name = "TB_CHAT_MESSAGE")
@Getter
@Setter
@NoArgsConstructor
@AllArgsConstructor
@Builder
public class ChatMessage {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
@Column(name = "msg_id")
private Long msgId;
@ManyToOne(fetch = FetchType.LAZY)
@JoinColumn(name = "session_id")
private ChatSession chatSession;
@Column(name = "msg_role", nullable = false, length = 20)
private String msgRole;
@Column(name = "msg_content", nullable = false, columnDefinition = "TEXT")
private String msgContent;
@Column(name = "topic_ids", columnDefinition = "BIGINT[]")
private Long[] topicIds;
@JdbcTypeCode(SqlTypes.JSON)
@Column(name = "source_refs", columnDefinition = "jsonb")
private List<Map<String, Object>> sourceRefs;
@Column(name = "token_count")
private Integer tokenCount;
@Column(name = "created_at")
@Builder.Default
private LocalDateTime createdAt = LocalDateTime.now();
}

View File

@@ -0,0 +1,37 @@
package kr.co.ragone.domain;
import jakarta.persistence.*;
import lombok.*;
import java.time.LocalDateTime;
@Entity
@Table(name = "TB_CHAT_SESSION")
@Getter
@Setter
@NoArgsConstructor
@AllArgsConstructor
@Builder
public class ChatSession {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
@Column(name = "session_id")
private Long sessionId;
@Column(name = "session_key", nullable = false, unique = true, length = 100)
private String sessionKey;
@Column(name = "session_title", length = 255)
private String sessionTitle;
@Column(name = "user_id", length = 100)
private String userId;
@Column(name = "created_at")
@Builder.Default
private LocalDateTime createdAt = LocalDateTime.now();
@Column(name = "updated_at")
@Builder.Default
private LocalDateTime updatedAt = LocalDateTime.now();
}

View File

@@ -0,0 +1,52 @@
package kr.co.ragone.domain;
import jakarta.persistence.*;
import lombok.*;
import org.hibernate.annotations.JdbcTypeCode;
import org.hibernate.type.SqlTypes;
import java.time.LocalDateTime;
import java.util.Map;
@Entity
@Table(name = "TB_DOC_CHUNK")
@Getter
@Setter
@NoArgsConstructor
@AllArgsConstructor
@Builder
public class DocChunk {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
@Column(name = "chunk_id")
private Long chunkId;
@ManyToOne(fetch = FetchType.LAZY)
@JoinColumn(name = "doc_id")
private DocInfo docInfo;
@ManyToOne(fetch = FetchType.LAZY)
@JoinColumn(name = "topic_id")
private TopicInfo topicInfo;
@Column(name = "chunk_content", nullable = false, columnDefinition = "TEXT")
private String chunkContent;
// pgvector는 Native Query로 처리
@Column(name = "chunk_embedding", columnDefinition = "vector(1536)")
private String chunkEmbedding;
@Column(name = "chunk_index")
private Integer chunkIndex;
@Column(name = "token_count")
private Integer tokenCount;
@JdbcTypeCode(SqlTypes.JSON)
@Column(name = "chunk_metadata", columnDefinition = "jsonb")
private Map<String, Object> chunkMetadata;
@Column(name = "created_at")
@Builder.Default
private LocalDateTime createdAt = LocalDateTime.now();
}

View File

@@ -0,0 +1,60 @@
package kr.co.ragone.domain;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import jakarta.persistence.*;
import lombok.*;
import java.time.LocalDateTime;
@Entity
@Table(name = "TB_DOC_INFO")
@Getter
@Setter
@NoArgsConstructor
@AllArgsConstructor
@Builder
public class DocInfo {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
@Column(name = "doc_id")
private Long docId;
@ManyToOne(fetch = FetchType.LAZY)
@JoinColumn(name = "topic_id")
@JsonIgnoreProperties({"hibernateLazyInitializer", "handler"})
private TopicInfo topicInfo;
@Column(name = "file_name", nullable = false, length = 255)
private String fileName;
@Column(name = "original_name", length = 255)
private String originalName;
@Column(name = "file_path", length = 500)
private String filePath;
@Column(name = "file_size")
private Long fileSize;
@Column(name = "file_type", length = 50)
private String fileType;
@Column(name = "chunk_count")
@Builder.Default
private Integer chunkCount = 0;
@Column(name = "doc_status", length = 20)
@Builder.Default
private String docStatus = "PENDING";
@Column(name = "error_msg", columnDefinition = "TEXT")
private String errorMsg;
@Column(name = "created_at")
@Builder.Default
private LocalDateTime createdAt = LocalDateTime.now();
@Column(name = "updated_at")
@Builder.Default
private LocalDateTime updatedAt = LocalDateTime.now();
}

View File

@@ -0,0 +1,44 @@
package kr.co.ragone.domain;
import jakarta.persistence.*;
import lombok.*;
import java.time.LocalDateTime;
@Entity
@Table(name = "TB_TOPIC_INFO")
@Getter
@Setter
@NoArgsConstructor
@AllArgsConstructor
@Builder
public class TopicInfo {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
@Column(name = "topic_id")
private Long topicId;
@Column(name = "topic_code", nullable = false, unique = true, length = 50)
private String topicCode;
@Column(name = "topic_name", nullable = false, length = 100)
private String topicName;
@Column(name = "topic_desc", columnDefinition = "TEXT")
private String topicDesc;
@Column(name = "topic_icon", length = 50)
private String topicIcon;
@Column(name = "is_active")
@Builder.Default
private Boolean isActive = true;
@Column(name = "created_at")
@Builder.Default
private LocalDateTime createdAt = LocalDateTime.now();
@Column(name = "updated_at")
@Builder.Default
private LocalDateTime updatedAt = LocalDateTime.now();
}

View File

@@ -0,0 +1,13 @@
package kr.co.ragone.repository;
import kr.co.ragone.domain.ChatMessage;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.stereotype.Repository;
import java.util.List;
@Repository
public interface ChatMessageRepository extends JpaRepository<ChatMessage, Long> {
List<ChatMessage> findByChatSession_SessionIdOrderByCreatedAtAsc(Long sessionId);
}

View File

@@ -0,0 +1,13 @@
package kr.co.ragone.repository;
import kr.co.ragone.domain.ChatSession;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.stereotype.Repository;
import java.util.Optional;
@Repository
public interface ChatSessionRepository extends JpaRepository<ChatSession, Long> {
Optional<ChatSession> findBySessionKey(String sessionKey);
}

View File

@@ -0,0 +1,55 @@
package kr.co.ragone.repository;
import kr.co.ragone.domain.DocChunk;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.data.jpa.repository.Query;
import org.springframework.data.repository.query.Param;
import org.springframework.stereotype.Repository;
import java.util.List;
@Repository
public interface DocChunkRepository extends JpaRepository<DocChunk, Long> {
List<DocChunk> findByDocInfo_DocId(Long docId);
void deleteByDocInfo_DocId(Long docId);
/**
* 벡터 유사도 검색 (전체 주제)
*/
@Query(value = """
SELECT c.chunk_id, c.doc_id, c.topic_id, c.chunk_content,
c.chunk_index, c.token_count, c.chunk_metadata, c.created_at,
1 - (c.chunk_embedding <=> cast(:embedding as vector)) as similarity
FROM TB_DOC_CHUNK c
WHERE 1 - (c.chunk_embedding <=> cast(:embedding as vector)) > :threshold
ORDER BY c.chunk_embedding <=> cast(:embedding as vector)
LIMIT :limit
""", nativeQuery = true)
List<Object[]> findSimilarChunks(
@Param("embedding") String embedding,
@Param("threshold") double threshold,
@Param("limit") int limit
);
/**
* 벡터 유사도 검색 (특정 주제들)
*/
@Query(value = """
SELECT c.chunk_id, c.doc_id, c.topic_id, c.chunk_content,
c.chunk_index, c.token_count, c.chunk_metadata, c.created_at,
1 - (c.chunk_embedding <=> cast(:embedding as vector)) as similarity
FROM TB_DOC_CHUNK c
WHERE c.topic_id = ANY(cast(:topicIds as BIGINT[]))
AND 1 - (c.chunk_embedding <=> cast(:embedding as vector)) > :threshold
ORDER BY c.chunk_embedding <=> cast(:embedding as vector)
LIMIT :limit
""", nativeQuery = true)
List<Object[]> findSimilarChunksByTopics(
@Param("embedding") String embedding,
@Param("topicIds") Long[] topicIds,
@Param("threshold") double threshold,
@Param("limit") int limit
);
}

View File

@@ -0,0 +1,15 @@
package kr.co.ragone.repository;
import kr.co.ragone.domain.DocInfo;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.stereotype.Repository;
import java.util.List;
@Repository
public interface DocInfoRepository extends JpaRepository<DocInfo, Long> {
List<DocInfo> findByTopicInfo_TopicId(Long topicId);
List<DocInfo> findByDocStatus(String docStatus);
}

View File

@@ -0,0 +1,16 @@
package kr.co.ragone.repository;
import kr.co.ragone.domain.TopicInfo;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.stereotype.Repository;
import java.util.List;
import java.util.Optional;
@Repository
public interface TopicInfoRepository extends JpaRepository<TopicInfo, Long> {
Optional<TopicInfo> findByTopicCode(String topicCode);
List<TopicInfo> findByIsActiveTrue();
}

View File

@@ -0,0 +1,275 @@
package kr.co.ragone.service;
import com.theokanning.openai.completion.chat.ChatCompletionRequest;
import com.theokanning.openai.completion.chat.ChatCompletionResult;
import com.theokanning.openai.completion.chat.ChatMessage;
import com.theokanning.openai.service.OpenAiService;
import kr.co.ragone.domain.ChatSession;
import kr.co.ragone.repository.ChatMessageRepository;
import kr.co.ragone.repository.ChatSessionRepository;
import kr.co.ragone.repository.DocChunkRepository;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;
import java.util.stream.Collectors;
@Slf4j
@Service
@RequiredArgsConstructor
public class ChatService {
private final OpenAiService openAiService;
private final EmbeddingService embeddingService;
private final DocChunkRepository docChunkRepository;
private final ChatSessionRepository chatSessionRepository;
private final ChatMessageRepository chatMessageRepository;
@Value("${openai.model.chat}")
private String chatModel;
@Value("${rag.retrieval.top-k}")
private int topK;
@Value("${rag.retrieval.similarity-threshold}")
private double similarityThreshold;
/**
* RAG 기반 질의응답 (세션 저장 포함)
*/
@Transactional
public RagResponse ask(String question, List<Long> topicIds, String sessionKey) {
// 1. 세션 조회 또는 생성
ChatSession session = getOrCreateSession(sessionKey, question);
// 2. 사용자 메시지 저장
saveMessage(session, "user", question, topicIds, null);
// 3. 질문 임베딩
String questionEmbedding = embeddingService.createEmbeddingAsString(question);
log.info("[RAG] Question: {}", question);
log.info("[RAG] TopicIds: {}", topicIds);
log.info("[RAG] Threshold: {}, TopK: {}", similarityThreshold, topK);
// 4. 유사 문서 검색
List<Object[]> chunks;
if (topicIds == null || topicIds.isEmpty()) {
log.info("[RAG] Searching ALL topics");
chunks = docChunkRepository.findSimilarChunks(
questionEmbedding, similarityThreshold, topK);
} else {
log.info("[RAG] Searching specific topics: {}", topicIds);
chunks = docChunkRepository.findSimilarChunksByTopics(
questionEmbedding, topicIds.toArray(new Long[0]),
similarityThreshold, topK);
}
log.info("[RAG] Found {} relevant chunks", chunks.size());
// 청크 내용 로깅
for (int i = 0; i < chunks.size(); i++) {
Object[] row = chunks.get(i);
String content = (String) row[3];
Double similarity = ((Number) row[8]).doubleValue();
log.info("[RAG] Chunk {}: similarity={}, content={}",
i + 1, String.format("%.3f", similarity),
content.substring(0, Math.min(100, content.length())));
}
// 5. 컨텍스트 구성
String context = buildContext(chunks);
// 6. 프롬프트 구성 및 GPT 호출
String answer = generateAnswer(question, context, chunks.isEmpty());
// 7. AI 응답 메시지 저장
List<SourceInfo> sources = extractSources(chunks);
saveMessage(session, "assistant", answer, topicIds, sources);
// 8. 응답 구성
return RagResponse.builder()
.sessionKey(session.getSessionKey())
.answer(answer)
.sources(sources)
.build();
}
/**
* 세션 조회 또는 생성
*/
private ChatSession getOrCreateSession(String sessionKey, String firstQuestion) {
if (sessionKey != null && !sessionKey.isBlank()) {
return chatSessionRepository.findBySessionKey(sessionKey)
.orElseGet(() -> createSession(sessionKey, firstQuestion));
}
return createSession(UUID.randomUUID().toString(), firstQuestion);
}
/**
* 새 세션 생성
*/
private ChatSession createSession(String sessionKey, String title) {
String sessionTitle = title.length() > 50 ? title.substring(0, 50) + "..." : title;
ChatSession session = ChatSession.builder()
.sessionKey(sessionKey)
.sessionTitle(sessionTitle)
.build();
return chatSessionRepository.save(session);
}
/**
* 메시지 저장
*/
private void saveMessage(ChatSession session, String role, String content,
List<Long> topicIds, List<SourceInfo> sources) {
kr.co.ragone.domain.ChatMessage message = kr.co.ragone.domain.ChatMessage.builder()
.chatSession(session)
.msgRole(role)
.msgContent(content)
.build();
chatMessageRepository.save(message);
session.setUpdatedAt(LocalDateTime.now());
chatSessionRepository.save(session);
}
private String buildContext(List<Object[]> chunks) {
if (chunks.isEmpty()) {
return "";
}
StringBuilder sb = new StringBuilder();
sb.append("=== 검색된 문서 내용 ===\n\n");
for (int i = 0; i < chunks.size(); i++) {
Object[] row = chunks.get(i);
String content = (String) row[3]; // chunk_content
Double similarity = ((Number) row[8]).doubleValue(); // similarity
sb.append(String.format("【문서 %d】 (관련도: %.0f%%)\n", i + 1, similarity * 100));
sb.append("".repeat(40)).append("\n");
sb.append(content.trim());
sb.append("\n\n");
}
return sb.toString();
}
private String generateAnswer(String question, String context, boolean noContext) {
String systemPrompt;
if (noContext) {
systemPrompt = """
당신은 친절한 문서 기반 질의응답 어시스턴트입니다.
현재 검색된 관련 문서가 없습니다.
사용자에게 다음을 안내해주세요:
1. 해당 질문과 관련된 문서가 시스템에 등록되어 있지 않을 수 있습니다.
2. 더 구체적인 키워드로 질문하면 도움이 될 수 있습니다.
3. 관리자에게 관련 문서 등록을 요청할 수 있습니다.
단, 일반적인 상식이나 공개된 정보로 답변 가능한 경우 도움을 드릴 수 있습니다.
""";
} else {
systemPrompt = """
당신은 전문적인 데이터 분석 및 문서 기반 질의응답 어시스턴트입니다.
【역할】
- 제공된 문서 내용을 깊이 있게 분석하여 답변합니다.
- 데이터를 요약, 비교, 분석하여 인사이트를 제공합니다.
- 사용자가 이해하기 쉽게 구조화된 답변을 합니다.
【답변 규칙】
1. 문서에 있는 정보를 최대한 활용하여 상세히 답변하세요.
2. 숫자, 날짜, 이름 등 구체적인 정보가 있으면 반드시 포함하세요.
3. 여러 문서의 정보를 종합하여 분석적인 답변을 제공하세요.
4. 표나 목록 형태로 정리하면 좋은 내용은 구조화하세요.
5. 문서에서 직접 확인되지 않는 내용은 추측하지 마세요.
6. 답변 마지막에 참고한 문서 번호를 명시하세요.
【분석 관점】
- 진행 상황, 진척률, 일정 관련 질문: 구체적인 수치와 기간을 제시
- 비교 질문: 표 형태로 비교 정리
- 요약 질문: 핵심 포인트를 불릿으로 정리
- 추세/변화 질문: 시간순 또는 단계별로 설명
""";
}
String userPrompt;
if (noContext) {
userPrompt = String.format("""
[질문]
%s
관련 문서를 찾을 수 없었습니다.
위 안내에 따라 사용자에게 도움이 되는 응답을 해주세요.
""", question);
} else {
userPrompt = String.format("""
%s
[질문]
%s
위 문서 내용을 분석하여 질문에 상세히 답변해주세요.
""", context, question);
}
List<ChatMessage> messages = new ArrayList<>();
messages.add(new ChatMessage("system", systemPrompt));
messages.add(new ChatMessage("user", userPrompt));
ChatCompletionRequest request = ChatCompletionRequest.builder()
.model(chatModel)
.messages(messages)
.temperature(0.3)
.maxTokens(2000) // 더 긴 답변 허용
.build();
ChatCompletionResult result = openAiService.createChatCompletion(request);
return result.getChoices().get(0).getMessage().getContent();
}
private List<SourceInfo> extractSources(List<Object[]> chunks) {
return chunks.stream()
.map(row -> SourceInfo.builder()
.chunkId(((Number) row[0]).longValue())
.docId(((Number) row[1]).longValue())
.content(truncate((String) row[3], 150))
.similarity(((Number) row[8]).doubleValue())
.build())
.collect(Collectors.toList());
}
private String truncate(String text, int maxLength) {
if (text.length() <= maxLength) {
return text;
}
return text.substring(0, maxLength) + "...";
}
@lombok.Data
@lombok.Builder
public static class RagResponse {
private String sessionKey;
private String answer;
private List<SourceInfo> sources;
}
@lombok.Data
@lombok.Builder
public static class SourceInfo {
private Long chunkId;
private Long docId;
private String content;
private Double similarity;
}
}

View File

@@ -0,0 +1,202 @@
package kr.co.ragone.service;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
@Slf4j
@Service
public class ChunkingService {
@Value("${rag.chunk.size:1000}")
private int chunkSize;
@Value("${rag.chunk.overlap:100}")
private int chunkOverlap;
// 최소 청크 크기 (이보다 작으면 단일 청크로)
private static final int MIN_CHUNK_SIZE = 50;
/**
* 텍스트를 청크로 분할
*/
public List<ChunkResult> chunkText(String text) {
List<ChunkResult> chunks = new ArrayList<>();
if (text == null || text.isBlank()) {
log.warn("빈 텍스트가 입력되었습니다.");
return chunks;
}
// 텍스트 정규화
text = normalizeText(text);
log.info("청킹 시작: 원본 {}자, 청크 크기 {}, 오버랩 {}",
text.length(), chunkSize, chunkOverlap);
// 텍스트가 짧으면 단일 청크로
if (text.length() <= chunkSize) {
chunks.add(createChunk(text, 0));
log.info("텍스트가 짧아 단일 청크로 생성: {}자", text.length());
return chunks;
}
// 문장 단위로 분할 후 청크 구성
List<String> sentences = splitIntoSentences(text);
log.debug("문장 {} 개로 분할됨", sentences.size());
StringBuilder currentChunk = new StringBuilder();
int chunkIndex = 0;
for (String sentence : sentences) {
// 현재 청크에 문장 추가 시 크기 초과하면 저장
if (currentChunk.length() + sentence.length() > chunkSize && currentChunk.length() >= MIN_CHUNK_SIZE) {
chunks.add(createChunk(currentChunk.toString().trim(), chunkIndex++));
// 오버랩 처리
String overlap = getOverlapText(currentChunk.toString());
currentChunk = new StringBuilder(overlap);
}
if (currentChunk.length() > 0 && !currentChunk.toString().endsWith(" ")) {
currentChunk.append(" ");
}
currentChunk.append(sentence.trim());
}
// 마지막 청크 저장
if (currentChunk.length() >= MIN_CHUNK_SIZE) {
chunks.add(createChunk(currentChunk.toString().trim(), chunkIndex));
} else if (currentChunk.length() > 0 && !chunks.isEmpty()) {
// 너무 짧으면 이전 청크에 병합
ChunkResult lastChunk = chunks.get(chunks.size() - 1);
String merged = lastChunk.getContent() + " " + currentChunk.toString().trim();
chunks.set(chunks.size() - 1, createChunk(merged, lastChunk.getIndex()));
} else if (currentChunk.length() > 0) {
chunks.add(createChunk(currentChunk.toString().trim(), chunkIndex));
}
log.info("청킹 완료: {} 청크 생성", chunks.size());
return chunks;
}
/**
* 텍스트 정규화
*/
private String normalizeText(String text) {
return text
// 연속 공백 제거
.replaceAll("[ \\t]+", " ")
// 연속 줄바꿈 정리
.replaceAll("\\n{2,}", "\n\n")
.trim();
}
/**
* 문장 단위로 분할
*/
private List<String> splitIntoSentences(String text) {
List<String> sentences = new ArrayList<>();
// 한국어/영어 문장 종결 패턴
// . ! ? 뒤에 공백이나 줄바꿈이 오는 경우
Pattern sentencePattern = Pattern.compile("(?<=[.!?。])\\s+|(?<=\\n)");
String[] parts = sentencePattern.split(text);
for (String part : parts) {
String trimmed = part.trim();
if (!trimmed.isEmpty()) {
sentences.add(trimmed);
}
}
// 문장 분할이 잘 안되면 (문장이 1개인 경우) 단어 수 기준으로 분할
if (sentences.size() <= 1 && text.length() > chunkSize) {
sentences = splitByWords(text, chunkSize / 2);
}
return sentences;
}
/**
* 단어 수 기준으로 분할 (문장 분할 실패 시 폴백)
*/
private List<String> splitByWords(String text, int wordsPerChunk) {
List<String> chunks = new ArrayList<>();
String[] words = text.split("\\s+");
StringBuilder current = new StringBuilder();
int wordCount = 0;
for (String word : words) {
if (wordCount >= wordsPerChunk && current.length() > 0) {
chunks.add(current.toString().trim());
current = new StringBuilder();
wordCount = 0;
}
if (current.length() > 0) {
current.append(" ");
}
current.append(word);
wordCount++;
}
if (current.length() > 0) {
chunks.add(current.toString().trim());
}
return chunks;
}
private ChunkResult createChunk(String content, int index) {
return ChunkResult.builder()
.content(content)
.index(index)
.tokenCount(estimateTokenCount(content))
.build();
}
private String getOverlapText(String text) {
if (text.length() <= chunkOverlap) {
return text;
}
// 단어 경계에서 자르기
String overlap = text.substring(text.length() - chunkOverlap);
int spaceIndex = overlap.indexOf(' ');
if (spaceIndex > 0) {
overlap = overlap.substring(spaceIndex + 1);
}
return overlap;
}
private int estimateTokenCount(String text) {
// 대략적인 토큰 수 추정
int koreanChars = 0;
int otherChars = 0;
for (char c : text.toCharArray()) {
if (Character.UnicodeScript.of(c) == Character.UnicodeScript.HANGUL) {
koreanChars++;
} else {
otherChars++;
}
}
// 한글은 약 1.5자당 1토큰, 영어는 4자당 1토큰
return (int) (koreanChars / 1.5 + otherChars / 4);
}
@lombok.Data
@lombok.Builder
public static class ChunkResult {
private String content;
private int index;
private int tokenCount;
}
}

View File

@@ -0,0 +1,235 @@
package kr.co.ragone.service;
import kr.co.ragone.domain.DocChunk;
import kr.co.ragone.domain.DocInfo;
import kr.co.ragone.domain.TopicInfo;
import kr.co.ragone.repository.DocChunkRepository;
import kr.co.ragone.repository.DocInfoRepository;
import kr.co.ragone.repository.TopicInfoRepository;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import org.springframework.web.multipart.MultipartFile;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.LocalDateTime;
import java.util.List;
import java.util.UUID;
@Slf4j
@Service
@RequiredArgsConstructor
public class DocumentIndexingService {
private final TopicInfoRepository topicInfoRepository;
private final DocInfoRepository docInfoRepository;
private final DocChunkRepository docChunkRepository;
private final DocumentParserService documentParserService;
private final ChunkingService chunkingService;
private final EmbeddingService embeddingService;
private final JdbcTemplate jdbcTemplate;
@Value("${file.upload-dir:./uploads}")
private String uploadDir;
/**
* 문서 업로드 및 인덱싱
*/
@Transactional
public DocInfo uploadAndIndex(Long topicId, MultipartFile file) throws Exception {
// 1. 주제 확인
TopicInfo topicInfo = topicInfoRepository.findById(topicId)
.orElseThrow(() -> new IllegalArgumentException("주제를 찾을 수 없습니다: " + topicId));
// 2. 파일 저장
String savedFileName = saveFile(file);
String filePath = Paths.get(uploadDir, savedFileName).toString();
// 3. 문서 정보 저장 (PROCESSING 상태)
DocInfo docInfo = DocInfo.builder()
.topicInfo(topicInfo)
.fileName(savedFileName)
.originalName(file.getOriginalFilename())
.filePath(filePath)
.fileSize(file.getSize())
.fileType(getFileExtension(file.getOriginalFilename()))
.docStatus("PROCESSING")
.build();
docInfo = docInfoRepository.save(docInfo);
// 4. 비동기로 인덱싱 처리
processIndexingAsync(docInfo.getDocId(), topicInfo, file);
return docInfo;
}
/**
* 비동기 인덱싱 처리
*/
@Async
public void processIndexingAsync(Long docId, TopicInfo topicInfo, MultipartFile file) {
try {
processIndexing(docId, topicInfo, file);
} catch (Exception e) {
log.error("인덱싱 실패: docId={}", docId, e);
updateDocStatus(docId, "FAILED", e.getMessage());
}
}
/**
* 실제 인덱싱 처리
*/
private void processIndexing(Long docId, TopicInfo topicInfo, MultipartFile file) throws Exception {
log.info("인덱싱 시작: docId={}, fileName={}", docId, file.getOriginalFilename());
// 1. 문서 파싱
String content = documentParserService.parseDocument(file);
if (content == null || content.isBlank()) {
throw new RuntimeException("문서 내용이 비어있습니다.");
}
// 2. 청킹
List<ChunkingService.ChunkResult> chunks = chunkingService.chunkText(content);
if (chunks.isEmpty()) {
throw new RuntimeException("청크 생성 실패");
}
log.info("청크 생성 완료: {} chunks", chunks.size());
// 3. 각 청크에 대해 임베딩 생성 및 저장
DocInfo docInfo = docInfoRepository.findById(docId)
.orElseThrow(() -> new RuntimeException("문서를 찾을 수 없습니다."));
for (ChunkingService.ChunkResult chunk : chunks) {
// 임베딩 생성
String embeddingVector = embeddingService.createEmbeddingAsString(chunk.getContent());
// Native Query로 벡터 저장
saveChunkWithEmbedding(docInfo, topicInfo, chunk, embeddingVector);
log.debug("청크 저장 완료: index={}", chunk.getIndex());
}
// 4. 문서 상태 업데이트
updateDocStatus(docId, "INDEXED", null);
updateChunkCount(docId, chunks.size());
log.info("인덱싱 완료: docId={}, chunks={}", docId, chunks.size());
}
/**
* 청크 + 벡터 저장 (Native Query 사용)
*/
private void saveChunkWithEmbedding(DocInfo docInfo, TopicInfo topicInfo,
ChunkingService.ChunkResult chunk, String embedding) {
String sql = """
INSERT INTO TB_DOC_CHUNK
(doc_id, topic_id, chunk_content, chunk_embedding, chunk_index, token_count, created_at)
VALUES (?, ?, ?, ?::vector, ?, ?, ?)
""";
jdbcTemplate.update(sql,
docInfo.getDocId(),
topicInfo.getTopicId(),
chunk.getContent(),
embedding,
chunk.getIndex(),
chunk.getTokenCount(),
LocalDateTime.now()
);
}
/**
* 파일 저장
*/
private String saveFile(MultipartFile file) throws IOException {
Path uploadPath = Paths.get(uploadDir);
if (!Files.exists(uploadPath)) {
Files.createDirectories(uploadPath);
}
String originalFilename = file.getOriginalFilename();
String extension = getFileExtension(originalFilename);
String savedFileName = UUID.randomUUID().toString() + "." + extension;
Path filePath = uploadPath.resolve(savedFileName);
Files.copy(file.getInputStream(), filePath);
log.info("파일 저장: {}", filePath);
return savedFileName;
}
private String getFileExtension(String filename) {
if (filename == null) return "";
int lastDot = filename.lastIndexOf('.');
return lastDot > 0 ? filename.substring(lastDot + 1).toLowerCase() : "";
}
private void updateDocStatus(Long docId, String status, String errorMsg) {
docInfoRepository.findById(docId).ifPresent(doc -> {
doc.setDocStatus(status);
doc.setErrorMsg(errorMsg);
doc.setUpdatedAt(LocalDateTime.now());
docInfoRepository.save(doc);
});
}
private void updateChunkCount(Long docId, int count) {
docInfoRepository.findById(docId).ifPresent(doc -> {
doc.setChunkCount(count);
doc.setUpdatedAt(LocalDateTime.now());
docInfoRepository.save(doc);
});
}
/**
* 문서 삭제 (청크 포함)
*/
@Transactional
public void deleteDocument(Long docId) {
DocInfo docInfo = docInfoRepository.findById(docId)
.orElseThrow(() -> new IllegalArgumentException("문서를 찾을 수 없습니다: " + docId));
// 파일 삭제
try {
Path filePath = Paths.get(docInfo.getFilePath());
Files.deleteIfExists(filePath);
} catch (IOException e) {
log.warn("파일 삭제 실패: {}", docInfo.getFilePath(), e);
}
// DB 삭제 (CASCADE로 청크도 함께 삭제됨)
docInfoRepository.delete(docInfo);
log.info("문서 삭제 완료: docId={}", docId);
}
/**
* 주제별 전체 문서 삭제
*/
@Transactional
public void deleteAllByTopic(Long topicId) {
List<DocInfo> documents = docInfoRepository.findByTopicInfo_TopicId(topicId);
log.info("전체 문서 삭제 시작: topicId={}, count={}", topicId, documents.size());
for (DocInfo docInfo : documents) {
// 파일 삭제
try {
Path filePath = Paths.get(docInfo.getFilePath());
Files.deleteIfExists(filePath);
} catch (IOException e) {
log.warn("파일 삭제 실패: {}", docInfo.getFilePath(), e);
}
}
// DB 삭제 (CASCADE로 청크도 함께 삭제됨)
docInfoRepository.deleteAll(documents);
log.info("전체 문서 삭제 완료: topicId={}, count={}", topicId, documents.size());
}
}

View File

@@ -0,0 +1,120 @@
package kr.co.ragone.service;
import lombok.extern.slf4j.Slf4j;
import org.apache.tika.Tika;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.pdf.PDFParserConfig;
import org.apache.tika.sax.BodyContentHandler;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
import java.io.IOException;
import java.io.InputStream;
@Slf4j
@Service
public class DocumentParserService {
private final Tika tika = new Tika();
/**
* 파일에서 텍스트 추출 (PDF, DOCX, TXT 등 지원)
*/
public String parseDocument(MultipartFile file) throws IOException, TikaException {
String filename = file.getOriginalFilename();
log.info("문서 파싱 시작: {}", filename);
try (InputStream inputStream = file.getInputStream()) {
String content;
// PDF인 경우 특별 처리
if (filename != null && filename.toLowerCase().endsWith(".pdf")) {
content = parsePdfWithOptions(inputStream);
} else {
content = tika.parseToString(inputStream);
}
// 텍스트 정제
content = cleanText(content);
log.info("문서 파싱 완료: {} chars", content.length());
// 텍스트가 너무 짧으면 경고
if (content.length() < 100) {
log.warn("⚠️ 추출된 텍스트가 매우 짧습니다. PDF가 이미지 기반일 수 있습니다.");
log.warn("원본 파일: {}, 추출 길이: {} chars", filename, content.length());
}
return content;
}
}
/**
* PDF 파싱 옵션 설정
*/
private String parsePdfWithOptions(InputStream inputStream) throws IOException, TikaException {
try {
// PDF 파서 설정
PDFParserConfig pdfConfig = new PDFParserConfig();
pdfConfig.setExtractInlineImages(true);
pdfConfig.setExtractUniqueInlineImagesOnly(true);
pdfConfig.setOcrStrategy(PDFParserConfig.OCR_STRATEGY.AUTO); // OCR 자동 시도
ParseContext parseContext = new ParseContext();
parseContext.set(PDFParserConfig.class, pdfConfig);
// 파서 설정
Parser parser = new AutoDetectParser();
parseContext.set(Parser.class, parser);
// 메타데이터 및 콘텐츠 핸들러
Metadata metadata = new Metadata();
BodyContentHandler handler = new BodyContentHandler(-1); // 무제한
parser.parse(inputStream, handler, metadata, parseContext);
// 메타데이터 로깅
log.debug("PDF 메타데이터:");
for (String name : metadata.names()) {
log.debug(" {}: {}", name, metadata.get(name));
}
return handler.toString();
} catch (Exception e) {
log.error("PDF 파싱 실패, 기본 파싱으로 재시도", e);
// 기본 파싱으로 폴백
return tika.parseToString(inputStream);
}
}
/**
* 텍스트 정제
*/
private String cleanText(String text) {
if (text == null) return "";
return text
// 연속 공백을 단일 공백으로
.replaceAll("[ \\t]+", " ")
// 연속 줄바꿈을 2개로 제한
.replaceAll("\\n{3,}", "\n\n")
// 앞뒤 공백 제거
.trim();
}
/**
* 파일에서 텍스트 추출 (파일 경로로)
*/
public String parseDocument(java.io.File file) throws IOException, TikaException {
log.info("문서 파싱 시작: {}", file.getName());
String content = tika.parseToString(file);
content = cleanText(content);
log.info("문서 파싱 완료: {} chars", content.length());
return content;
}
}

View File

@@ -0,0 +1,55 @@
package kr.co.ragone.service;
import com.theokanning.openai.embedding.EmbeddingRequest;
import com.theokanning.openai.embedding.EmbeddingResult;
import com.theokanning.openai.service.OpenAiService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
@Slf4j
@Service
@RequiredArgsConstructor
public class EmbeddingService {
private final OpenAiService openAiService;
@Value("${openai.model.embedding}")
private String embeddingModel;
/**
* 텍스트를 임베딩 벡터로 변환
*/
public List<Double> createEmbedding(String text) {
EmbeddingRequest request = EmbeddingRequest.builder()
.model(embeddingModel)
.input(Collections.singletonList(text))
.build();
EmbeddingResult result = openAiService.createEmbeddings(request);
return result.getData().get(0).getEmbedding();
}
/**
* 벡터를 PostgreSQL vector 형식 문자열로 변환
*/
public String toVectorString(List<Double> embedding) {
return "[" + embedding.stream()
.map(String::valueOf)
.collect(Collectors.joining(",")) + "]";
}
/**
* 텍스트를 PostgreSQL vector 형식으로 직접 변환
*/
public String createEmbeddingAsString(String text) {
List<Double> embedding = createEmbedding(text);
return toVectorString(embedding);
}
}

View File

@@ -0,0 +1,54 @@
server:
port: 8080
spring:
application:
name: ragone
profiles:
active: local
datasource:
url: jdbc:postgresql://172.25.0.79:5432/turbosoft_rag_db
username: turbosoft
password: xjqhthvmxm123
driver-class-name: org.postgresql.Driver
jpa:
hibernate:
ddl-auto: validate
show-sql: true
properties:
hibernate:
format_sql: true
dialect: org.hibernate.dialect.PostgreSQLDialect
servlet:
multipart:
max-file-size: 50MB
max-request-size: 50MB
# OpenAI 설정
openai:
api-key: ${OPENAI_API_KEY:your-api-key-here}
model:
embedding: text-embedding-3-small
chat: gpt-4o-mini
# RAG 설정
rag:
chunk:
size: 1000
overlap: 100
retrieval:
top-k: 10
similarity-threshold: 0.3 # 더 낮춰서 검색 범위 확대
# 파일 저장 경로
file:
upload-dir: ./uploads
logging:
level:
kr.co.ragone: DEBUG
org.hibernate.SQL: DEBUG