This commit is contained in:
2025-12-11 02:09:57 +09:00
parent 6c80670b47
commit 8749de6aef
34 changed files with 2115 additions and 0 deletions

View File

@@ -0,0 +1,12 @@
package kr.co.ragone;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
@SpringBootApplication
public class RagoneApplication {
public static void main(String[] args) {
SpringApplication.run(RagoneApplication.class, args);
}
}

View File

@@ -0,0 +1,10 @@
package kr.co.ragone.config;
import org.springframework.context.annotation.Configuration;
import org.springframework.scheduling.annotation.EnableAsync;
@Configuration
@EnableAsync
public class AsyncConfig {
// 비동기 처리 활성화
}

View File

@@ -0,0 +1,30 @@
package kr.co.ragone.config;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.web.cors.CorsConfiguration;
import org.springframework.web.cors.UrlBasedCorsConfigurationSource;
import org.springframework.web.filter.CorsFilter;
import java.util.Arrays;
@Configuration
public class CorsConfig {
@Bean
public CorsFilter corsFilter() {
CorsConfiguration config = new CorsConfiguration();
config.setAllowCredentials(true);
config.setAllowedOrigins(Arrays.asList(
"http://localhost:3000",
"http://127.0.0.1:3000"
));
config.setAllowedHeaders(Arrays.asList("*"));
config.setAllowedMethods(Arrays.asList("GET", "POST", "PUT", "DELETE", "OPTIONS"));
UrlBasedCorsConfigurationSource source = new UrlBasedCorsConfigurationSource();
source.registerCorsConfiguration("/**", config);
return new CorsFilter(source);
}
}

View File

@@ -0,0 +1,20 @@
package kr.co.ragone.config;
import com.theokanning.openai.service.OpenAiService;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import java.time.Duration;
@Configuration
public class OpenAiConfig {
@Value("${openai.api-key}")
private String apiKey;
@Bean
public OpenAiService openAiService() {
return new OpenAiService(apiKey, Duration.ofSeconds(60));
}
}

View File

@@ -0,0 +1,34 @@
package kr.co.ragone.controller;
import kr.co.ragone.service.ChatService;
import lombok.Data;
import lombok.RequiredArgsConstructor;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;
import java.util.List;
@RestController
@RequestMapping("/api/chat")
@RequiredArgsConstructor
public class ChatController {
private final ChatService chatService;
@PostMapping
public ResponseEntity<ChatService.RagResponse> chat(@RequestBody ChatRequest request) {
ChatService.RagResponse response = chatService.ask(
request.getQuestion(),
request.getTopicIds(),
request.getSessionKey()
);
return ResponseEntity.ok(response);
}
@Data
public static class ChatRequest {
private String question;
private List<Long> topicIds;
private String sessionKey; // 세션 키 (없으면 새로 생성)
}
}

View File

@@ -0,0 +1,150 @@
package kr.co.ragone.controller;
import kr.co.ragone.domain.DocInfo;
import kr.co.ragone.repository.DocInfoRepository;
import kr.co.ragone.service.DocumentIndexingService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.core.io.Resource;
import org.springframework.core.io.UrlResource;
import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
@Slf4j
@RestController
@RequestMapping("/api")
@RequiredArgsConstructor
public class DocumentController {
private final DocumentIndexingService documentIndexingService;
private final DocInfoRepository docInfoRepository;
/**
* 문서 업로드 및 인덱싱
*/
@PostMapping("/topics/{topicId}/documents/upload")
public ResponseEntity<DocInfo> uploadDocument(
@PathVariable Long topicId,
@RequestParam("file") MultipartFile file) {
log.info("문서 업로드 요청: topicId={}, fileName={}", topicId, file.getOriginalFilename());
try {
DocInfo docInfo = documentIndexingService.uploadAndIndex(topicId, file);
return ResponseEntity.ok(docInfo);
} catch (Exception e) {
log.error("문서 업로드 실패", e);
return ResponseEntity.badRequest().build();
}
}
/**
* 주제별 문서 목록 조회
*/
@GetMapping("/topics/{topicId}/documents")
public ResponseEntity<List<DocInfo>> getDocuments(@PathVariable Long topicId) {
List<DocInfo> documents = docInfoRepository.findByTopicInfo_TopicId(topicId);
return ResponseEntity.ok(documents);
}
/**
* 문서 상세 조회
*/
@GetMapping("/documents/{docId}")
public ResponseEntity<DocInfo> getDocument(@PathVariable Long docId) {
return docInfoRepository.findById(docId)
.map(ResponseEntity::ok)
.orElse(ResponseEntity.notFound().build());
}
/**
* 문서 다운로드
*/
@GetMapping("/documents/{docId}/download")
public ResponseEntity<Resource> downloadDocument(@PathVariable Long docId) {
try {
DocInfo docInfo = docInfoRepository.findById(docId)
.orElseThrow(() -> new IllegalArgumentException("문서를 찾을 수 없습니다: " + docId));
Path filePath = Paths.get(docInfo.getFilePath());
Resource resource = new UrlResource(filePath.toUri());
if (!resource.exists()) {
log.error("파일이 존재하지 않습니다: {}", docInfo.getFilePath());
return ResponseEntity.notFound().build();
}
// 파일명 인코딩 (한글 지원)
String encodedFileName = URLEncoder.encode(docInfo.getOriginalName(), StandardCharsets.UTF_8)
.replaceAll("\\+", "%20");
// Content-Type 결정
String contentType = getContentType(docInfo.getFileType());
return ResponseEntity.ok()
.contentType(MediaType.parseMediaType(contentType))
.header(HttpHeaders.CONTENT_DISPOSITION,
"attachment; filename=\"" + encodedFileName + "\"; filename*=UTF-8''" + encodedFileName)
.body(resource);
} catch (Exception e) {
log.error("문서 다운로드 실패", e);
return ResponseEntity.badRequest().build();
}
}
/**
* 문서 삭제
*/
@DeleteMapping("/documents/{docId}")
public ResponseEntity<Void> deleteDocument(@PathVariable Long docId) {
try {
documentIndexingService.deleteDocument(docId);
return ResponseEntity.ok().build();
} catch (Exception e) {
log.error("문서 삭제 실패", e);
return ResponseEntity.badRequest().build();
}
}
/**
* 주제별 전체 문서 삭제
*/
@DeleteMapping("/topics/{topicId}/documents")
public ResponseEntity<Void> deleteAllDocuments(@PathVariable Long topicId) {
try {
documentIndexingService.deleteAllByTopic(topicId);
return ResponseEntity.ok().build();
} catch (Exception e) {
log.error("전체 문서 삭제 실패", e);
return ResponseEntity.badRequest().build();
}
}
/**
* 파일 확장자에 따른 Content-Type 반환
*/
private String getContentType(String fileType) {
if (fileType == null) {
return "application/octet-stream";
}
return switch (fileType.toLowerCase()) {
case "pdf" -> "application/pdf";
case "docx" -> "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
case "doc" -> "application/msword";
case "txt" -> "text/plain; charset=UTF-8";
case "xlsx" -> "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
case "xls" -> "application/vnd.ms-excel";
case "hwp" -> "application/x-hwp";
default -> "application/octet-stream";
};
}
}

View File

@@ -0,0 +1,57 @@
package kr.co.ragone.controller;
import kr.co.ragone.domain.TopicInfo;
import kr.co.ragone.repository.TopicInfoRepository;
import lombok.RequiredArgsConstructor;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;
import java.util.List;
@RestController
@RequestMapping("/api/topics")
@RequiredArgsConstructor
public class TopicController {
private final TopicInfoRepository topicInfoRepository;
@GetMapping
public ResponseEntity<List<TopicInfo>> getTopics() {
List<TopicInfo> topics = topicInfoRepository.findByIsActiveTrue();
return ResponseEntity.ok(topics);
}
@GetMapping("/{topicId}")
public ResponseEntity<TopicInfo> getTopic(@PathVariable Long topicId) {
return topicInfoRepository.findById(topicId)
.map(ResponseEntity::ok)
.orElse(ResponseEntity.notFound().build());
}
@PostMapping
public ResponseEntity<TopicInfo> createTopic(@RequestBody TopicInfo topicInfo) {
TopicInfo saved = topicInfoRepository.save(topicInfo);
return ResponseEntity.ok(saved);
}
@PutMapping("/{topicId}")
public ResponseEntity<TopicInfo> updateTopic(
@PathVariable Long topicId,
@RequestBody TopicInfo topicInfo) {
return topicInfoRepository.findById(topicId)
.map(existing -> {
existing.setTopicName(topicInfo.getTopicName());
existing.setTopicDesc(topicInfo.getTopicDesc());
existing.setTopicIcon(topicInfo.getTopicIcon());
existing.setIsActive(topicInfo.getIsActive());
return ResponseEntity.ok(topicInfoRepository.save(existing));
})
.orElse(ResponseEntity.notFound().build());
}
@DeleteMapping("/{topicId}")
public ResponseEntity<Void> deleteTopic(@PathVariable Long topicId) {
topicInfoRepository.deleteById(topicId);
return ResponseEntity.ok().build();
}
}

View File

@@ -0,0 +1,48 @@
package kr.co.ragone.domain;
import jakarta.persistence.*;
import lombok.*;
import org.hibernate.annotations.JdbcTypeCode;
import org.hibernate.type.SqlTypes;
import java.time.LocalDateTime;
import java.util.List;
import java.util.Map;
@Entity
@Table(name = "TB_CHAT_MESSAGE")
@Getter
@Setter
@NoArgsConstructor
@AllArgsConstructor
@Builder
public class ChatMessage {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
@Column(name = "msg_id")
private Long msgId;
@ManyToOne(fetch = FetchType.LAZY)
@JoinColumn(name = "session_id")
private ChatSession chatSession;
@Column(name = "msg_role", nullable = false, length = 20)
private String msgRole;
@Column(name = "msg_content", nullable = false, columnDefinition = "TEXT")
private String msgContent;
@Column(name = "topic_ids", columnDefinition = "BIGINT[]")
private Long[] topicIds;
@JdbcTypeCode(SqlTypes.JSON)
@Column(name = "source_refs", columnDefinition = "jsonb")
private List<Map<String, Object>> sourceRefs;
@Column(name = "token_count")
private Integer tokenCount;
@Column(name = "created_at")
@Builder.Default
private LocalDateTime createdAt = LocalDateTime.now();
}

View File

@@ -0,0 +1,37 @@
package kr.co.ragone.domain;
import jakarta.persistence.*;
import lombok.*;
import java.time.LocalDateTime;
@Entity
@Table(name = "TB_CHAT_SESSION")
@Getter
@Setter
@NoArgsConstructor
@AllArgsConstructor
@Builder
public class ChatSession {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
@Column(name = "session_id")
private Long sessionId;
@Column(name = "session_key", nullable = false, unique = true, length = 100)
private String sessionKey;
@Column(name = "session_title", length = 255)
private String sessionTitle;
@Column(name = "user_id", length = 100)
private String userId;
@Column(name = "created_at")
@Builder.Default
private LocalDateTime createdAt = LocalDateTime.now();
@Column(name = "updated_at")
@Builder.Default
private LocalDateTime updatedAt = LocalDateTime.now();
}

View File

@@ -0,0 +1,52 @@
package kr.co.ragone.domain;
import jakarta.persistence.*;
import lombok.*;
import org.hibernate.annotations.JdbcTypeCode;
import org.hibernate.type.SqlTypes;
import java.time.LocalDateTime;
import java.util.Map;
@Entity
@Table(name = "TB_DOC_CHUNK")
@Getter
@Setter
@NoArgsConstructor
@AllArgsConstructor
@Builder
public class DocChunk {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
@Column(name = "chunk_id")
private Long chunkId;
@ManyToOne(fetch = FetchType.LAZY)
@JoinColumn(name = "doc_id")
private DocInfo docInfo;
@ManyToOne(fetch = FetchType.LAZY)
@JoinColumn(name = "topic_id")
private TopicInfo topicInfo;
@Column(name = "chunk_content", nullable = false, columnDefinition = "TEXT")
private String chunkContent;
// pgvector는 Native Query로 처리
@Column(name = "chunk_embedding", columnDefinition = "vector(1536)")
private String chunkEmbedding;
@Column(name = "chunk_index")
private Integer chunkIndex;
@Column(name = "token_count")
private Integer tokenCount;
@JdbcTypeCode(SqlTypes.JSON)
@Column(name = "chunk_metadata", columnDefinition = "jsonb")
private Map<String, Object> chunkMetadata;
@Column(name = "created_at")
@Builder.Default
private LocalDateTime createdAt = LocalDateTime.now();
}

View File

@@ -0,0 +1,60 @@
package kr.co.ragone.domain;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import jakarta.persistence.*;
import lombok.*;
import java.time.LocalDateTime;
@Entity
@Table(name = "TB_DOC_INFO")
@Getter
@Setter
@NoArgsConstructor
@AllArgsConstructor
@Builder
public class DocInfo {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
@Column(name = "doc_id")
private Long docId;
@ManyToOne(fetch = FetchType.LAZY)
@JoinColumn(name = "topic_id")
@JsonIgnoreProperties({"hibernateLazyInitializer", "handler"})
private TopicInfo topicInfo;
@Column(name = "file_name", nullable = false, length = 255)
private String fileName;
@Column(name = "original_name", length = 255)
private String originalName;
@Column(name = "file_path", length = 500)
private String filePath;
@Column(name = "file_size")
private Long fileSize;
@Column(name = "file_type", length = 50)
private String fileType;
@Column(name = "chunk_count")
@Builder.Default
private Integer chunkCount = 0;
@Column(name = "doc_status", length = 20)
@Builder.Default
private String docStatus = "PENDING";
@Column(name = "error_msg", columnDefinition = "TEXT")
private String errorMsg;
@Column(name = "created_at")
@Builder.Default
private LocalDateTime createdAt = LocalDateTime.now();
@Column(name = "updated_at")
@Builder.Default
private LocalDateTime updatedAt = LocalDateTime.now();
}

View File

@@ -0,0 +1,44 @@
package kr.co.ragone.domain;
import jakarta.persistence.*;
import lombok.*;
import java.time.LocalDateTime;
@Entity
@Table(name = "TB_TOPIC_INFO")
@Getter
@Setter
@NoArgsConstructor
@AllArgsConstructor
@Builder
public class TopicInfo {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
@Column(name = "topic_id")
private Long topicId;
@Column(name = "topic_code", nullable = false, unique = true, length = 50)
private String topicCode;
@Column(name = "topic_name", nullable = false, length = 100)
private String topicName;
@Column(name = "topic_desc", columnDefinition = "TEXT")
private String topicDesc;
@Column(name = "topic_icon", length = 50)
private String topicIcon;
@Column(name = "is_active")
@Builder.Default
private Boolean isActive = true;
@Column(name = "created_at")
@Builder.Default
private LocalDateTime createdAt = LocalDateTime.now();
@Column(name = "updated_at")
@Builder.Default
private LocalDateTime updatedAt = LocalDateTime.now();
}

View File

@@ -0,0 +1,13 @@
package kr.co.ragone.repository;
import kr.co.ragone.domain.ChatMessage;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.stereotype.Repository;
import java.util.List;
@Repository
public interface ChatMessageRepository extends JpaRepository<ChatMessage, Long> {
List<ChatMessage> findByChatSession_SessionIdOrderByCreatedAtAsc(Long sessionId);
}

View File

@@ -0,0 +1,13 @@
package kr.co.ragone.repository;
import kr.co.ragone.domain.ChatSession;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.stereotype.Repository;
import java.util.Optional;
@Repository
public interface ChatSessionRepository extends JpaRepository<ChatSession, Long> {
Optional<ChatSession> findBySessionKey(String sessionKey);
}

View File

@@ -0,0 +1,55 @@
package kr.co.ragone.repository;
import kr.co.ragone.domain.DocChunk;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.data.jpa.repository.Query;
import org.springframework.data.repository.query.Param;
import org.springframework.stereotype.Repository;
import java.util.List;
@Repository
public interface DocChunkRepository extends JpaRepository<DocChunk, Long> {
List<DocChunk> findByDocInfo_DocId(Long docId);
void deleteByDocInfo_DocId(Long docId);
/**
* 벡터 유사도 검색 (전체 주제)
*/
@Query(value = """
SELECT c.chunk_id, c.doc_id, c.topic_id, c.chunk_content,
c.chunk_index, c.token_count, c.chunk_metadata, c.created_at,
1 - (c.chunk_embedding <=> cast(:embedding as vector)) as similarity
FROM TB_DOC_CHUNK c
WHERE 1 - (c.chunk_embedding <=> cast(:embedding as vector)) > :threshold
ORDER BY c.chunk_embedding <=> cast(:embedding as vector)
LIMIT :limit
""", nativeQuery = true)
List<Object[]> findSimilarChunks(
@Param("embedding") String embedding,
@Param("threshold") double threshold,
@Param("limit") int limit
);
/**
* 벡터 유사도 검색 (특정 주제들)
*/
@Query(value = """
SELECT c.chunk_id, c.doc_id, c.topic_id, c.chunk_content,
c.chunk_index, c.token_count, c.chunk_metadata, c.created_at,
1 - (c.chunk_embedding <=> cast(:embedding as vector)) as similarity
FROM TB_DOC_CHUNK c
WHERE c.topic_id = ANY(cast(:topicIds as BIGINT[]))
AND 1 - (c.chunk_embedding <=> cast(:embedding as vector)) > :threshold
ORDER BY c.chunk_embedding <=> cast(:embedding as vector)
LIMIT :limit
""", nativeQuery = true)
List<Object[]> findSimilarChunksByTopics(
@Param("embedding") String embedding,
@Param("topicIds") Long[] topicIds,
@Param("threshold") double threshold,
@Param("limit") int limit
);
}

View File

@@ -0,0 +1,15 @@
package kr.co.ragone.repository;
import kr.co.ragone.domain.DocInfo;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.stereotype.Repository;
import java.util.List;
@Repository
public interface DocInfoRepository extends JpaRepository<DocInfo, Long> {
List<DocInfo> findByTopicInfo_TopicId(Long topicId);
List<DocInfo> findByDocStatus(String docStatus);
}

View File

@@ -0,0 +1,16 @@
package kr.co.ragone.repository;
import kr.co.ragone.domain.TopicInfo;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.stereotype.Repository;
import java.util.List;
import java.util.Optional;
@Repository
public interface TopicInfoRepository extends JpaRepository<TopicInfo, Long> {
Optional<TopicInfo> findByTopicCode(String topicCode);
List<TopicInfo> findByIsActiveTrue();
}

View File

@@ -0,0 +1,275 @@
package kr.co.ragone.service;
import com.theokanning.openai.completion.chat.ChatCompletionRequest;
import com.theokanning.openai.completion.chat.ChatCompletionResult;
import com.theokanning.openai.completion.chat.ChatMessage;
import com.theokanning.openai.service.OpenAiService;
import kr.co.ragone.domain.ChatSession;
import kr.co.ragone.repository.ChatMessageRepository;
import kr.co.ragone.repository.ChatSessionRepository;
import kr.co.ragone.repository.DocChunkRepository;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;
import java.util.stream.Collectors;
@Slf4j
@Service
@RequiredArgsConstructor
public class ChatService {
private final OpenAiService openAiService;
private final EmbeddingService embeddingService;
private final DocChunkRepository docChunkRepository;
private final ChatSessionRepository chatSessionRepository;
private final ChatMessageRepository chatMessageRepository;
@Value("${openai.model.chat}")
private String chatModel;
@Value("${rag.retrieval.top-k}")
private int topK;
@Value("${rag.retrieval.similarity-threshold}")
private double similarityThreshold;
/**
* RAG 기반 질의응답 (세션 저장 포함)
*/
@Transactional
public RagResponse ask(String question, List<Long> topicIds, String sessionKey) {
// 1. 세션 조회 또는 생성
ChatSession session = getOrCreateSession(sessionKey, question);
// 2. 사용자 메시지 저장
saveMessage(session, "user", question, topicIds, null);
// 3. 질문 임베딩
String questionEmbedding = embeddingService.createEmbeddingAsString(question);
log.info("[RAG] Question: {}", question);
log.info("[RAG] TopicIds: {}", topicIds);
log.info("[RAG] Threshold: {}, TopK: {}", similarityThreshold, topK);
// 4. 유사 문서 검색
List<Object[]> chunks;
if (topicIds == null || topicIds.isEmpty()) {
log.info("[RAG] Searching ALL topics");
chunks = docChunkRepository.findSimilarChunks(
questionEmbedding, similarityThreshold, topK);
} else {
log.info("[RAG] Searching specific topics: {}", topicIds);
chunks = docChunkRepository.findSimilarChunksByTopics(
questionEmbedding, topicIds.toArray(new Long[0]),
similarityThreshold, topK);
}
log.info("[RAG] Found {} relevant chunks", chunks.size());
// 청크 내용 로깅
for (int i = 0; i < chunks.size(); i++) {
Object[] row = chunks.get(i);
String content = (String) row[3];
Double similarity = ((Number) row[8]).doubleValue();
log.info("[RAG] Chunk {}: similarity={}, content={}",
i + 1, String.format("%.3f", similarity),
content.substring(0, Math.min(100, content.length())));
}
// 5. 컨텍스트 구성
String context = buildContext(chunks);
// 6. 프롬프트 구성 및 GPT 호출
String answer = generateAnswer(question, context, chunks.isEmpty());
// 7. AI 응답 메시지 저장
List<SourceInfo> sources = extractSources(chunks);
saveMessage(session, "assistant", answer, topicIds, sources);
// 8. 응답 구성
return RagResponse.builder()
.sessionKey(session.getSessionKey())
.answer(answer)
.sources(sources)
.build();
}
/**
* 세션 조회 또는 생성
*/
private ChatSession getOrCreateSession(String sessionKey, String firstQuestion) {
if (sessionKey != null && !sessionKey.isBlank()) {
return chatSessionRepository.findBySessionKey(sessionKey)
.orElseGet(() -> createSession(sessionKey, firstQuestion));
}
return createSession(UUID.randomUUID().toString(), firstQuestion);
}
/**
* 새 세션 생성
*/
private ChatSession createSession(String sessionKey, String title) {
String sessionTitle = title.length() > 50 ? title.substring(0, 50) + "..." : title;
ChatSession session = ChatSession.builder()
.sessionKey(sessionKey)
.sessionTitle(sessionTitle)
.build();
return chatSessionRepository.save(session);
}
/**
* 메시지 저장
*/
private void saveMessage(ChatSession session, String role, String content,
List<Long> topicIds, List<SourceInfo> sources) {
kr.co.ragone.domain.ChatMessage message = kr.co.ragone.domain.ChatMessage.builder()
.chatSession(session)
.msgRole(role)
.msgContent(content)
.build();
chatMessageRepository.save(message);
session.setUpdatedAt(LocalDateTime.now());
chatSessionRepository.save(session);
}
private String buildContext(List<Object[]> chunks) {
if (chunks.isEmpty()) {
return "";
}
StringBuilder sb = new StringBuilder();
sb.append("=== 검색된 문서 내용 ===\n\n");
for (int i = 0; i < chunks.size(); i++) {
Object[] row = chunks.get(i);
String content = (String) row[3]; // chunk_content
Double similarity = ((Number) row[8]).doubleValue(); // similarity
sb.append(String.format("【문서 %d】 (관련도: %.0f%%)\n", i + 1, similarity * 100));
sb.append("".repeat(40)).append("\n");
sb.append(content.trim());
sb.append("\n\n");
}
return sb.toString();
}
private String generateAnswer(String question, String context, boolean noContext) {
String systemPrompt;
if (noContext) {
systemPrompt = """
당신은 친절한 문서 기반 질의응답 어시스턴트입니다.
현재 검색된 관련 문서가 없습니다.
사용자에게 다음을 안내해주세요:
1. 해당 질문과 관련된 문서가 시스템에 등록되어 있지 않을 수 있습니다.
2. 더 구체적인 키워드로 질문하면 도움이 될 수 있습니다.
3. 관리자에게 관련 문서 등록을 요청할 수 있습니다.
단, 일반적인 상식이나 공개된 정보로 답변 가능한 경우 도움을 드릴 수 있습니다.
""";
} else {
systemPrompt = """
당신은 전문적인 데이터 분석 및 문서 기반 질의응답 어시스턴트입니다.
【역할】
- 제공된 문서 내용을 깊이 있게 분석하여 답변합니다.
- 데이터를 요약, 비교, 분석하여 인사이트를 제공합니다.
- 사용자가 이해하기 쉽게 구조화된 답변을 합니다.
【답변 규칙】
1. 문서에 있는 정보를 최대한 활용하여 상세히 답변하세요.
2. 숫자, 날짜, 이름 등 구체적인 정보가 있으면 반드시 포함하세요.
3. 여러 문서의 정보를 종합하여 분석적인 답변을 제공하세요.
4. 표나 목록 형태로 정리하면 좋은 내용은 구조화하세요.
5. 문서에서 직접 확인되지 않는 내용은 추측하지 마세요.
6. 답변 마지막에 참고한 문서 번호를 명시하세요.
【분석 관점】
- 진행 상황, 진척률, 일정 관련 질문: 구체적인 수치와 기간을 제시
- 비교 질문: 표 형태로 비교 정리
- 요약 질문: 핵심 포인트를 불릿으로 정리
- 추세/변화 질문: 시간순 또는 단계별로 설명
""";
}
String userPrompt;
if (noContext) {
userPrompt = String.format("""
[질문]
%s
관련 문서를 찾을 수 없었습니다.
위 안내에 따라 사용자에게 도움이 되는 응답을 해주세요.
""", question);
} else {
userPrompt = String.format("""
%s
[질문]
%s
위 문서 내용을 분석하여 질문에 상세히 답변해주세요.
""", context, question);
}
List<ChatMessage> messages = new ArrayList<>();
messages.add(new ChatMessage("system", systemPrompt));
messages.add(new ChatMessage("user", userPrompt));
ChatCompletionRequest request = ChatCompletionRequest.builder()
.model(chatModel)
.messages(messages)
.temperature(0.3)
.maxTokens(2000) // 더 긴 답변 허용
.build();
ChatCompletionResult result = openAiService.createChatCompletion(request);
return result.getChoices().get(0).getMessage().getContent();
}
private List<SourceInfo> extractSources(List<Object[]> chunks) {
return chunks.stream()
.map(row -> SourceInfo.builder()
.chunkId(((Number) row[0]).longValue())
.docId(((Number) row[1]).longValue())
.content(truncate((String) row[3], 150))
.similarity(((Number) row[8]).doubleValue())
.build())
.collect(Collectors.toList());
}
private String truncate(String text, int maxLength) {
if (text.length() <= maxLength) {
return text;
}
return text.substring(0, maxLength) + "...";
}
@lombok.Data
@lombok.Builder
public static class RagResponse {
private String sessionKey;
private String answer;
private List<SourceInfo> sources;
}
@lombok.Data
@lombok.Builder
public static class SourceInfo {
private Long chunkId;
private Long docId;
private String content;
private Double similarity;
}
}

View File

@@ -0,0 +1,202 @@
package kr.co.ragone.service;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
@Slf4j
@Service
public class ChunkingService {
@Value("${rag.chunk.size:1000}")
private int chunkSize;
@Value("${rag.chunk.overlap:100}")
private int chunkOverlap;
// 최소 청크 크기 (이보다 작으면 단일 청크로)
private static final int MIN_CHUNK_SIZE = 50;
/**
* 텍스트를 청크로 분할
*/
public List<ChunkResult> chunkText(String text) {
List<ChunkResult> chunks = new ArrayList<>();
if (text == null || text.isBlank()) {
log.warn("빈 텍스트가 입력되었습니다.");
return chunks;
}
// 텍스트 정규화
text = normalizeText(text);
log.info("청킹 시작: 원본 {}자, 청크 크기 {}, 오버랩 {}",
text.length(), chunkSize, chunkOverlap);
// 텍스트가 짧으면 단일 청크로
if (text.length() <= chunkSize) {
chunks.add(createChunk(text, 0));
log.info("텍스트가 짧아 단일 청크로 생성: {}자", text.length());
return chunks;
}
// 문장 단위로 분할 후 청크 구성
List<String> sentences = splitIntoSentences(text);
log.debug("문장 {} 개로 분할됨", sentences.size());
StringBuilder currentChunk = new StringBuilder();
int chunkIndex = 0;
for (String sentence : sentences) {
// 현재 청크에 문장 추가 시 크기 초과하면 저장
if (currentChunk.length() + sentence.length() > chunkSize && currentChunk.length() >= MIN_CHUNK_SIZE) {
chunks.add(createChunk(currentChunk.toString().trim(), chunkIndex++));
// 오버랩 처리
String overlap = getOverlapText(currentChunk.toString());
currentChunk = new StringBuilder(overlap);
}
if (currentChunk.length() > 0 && !currentChunk.toString().endsWith(" ")) {
currentChunk.append(" ");
}
currentChunk.append(sentence.trim());
}
// 마지막 청크 저장
if (currentChunk.length() >= MIN_CHUNK_SIZE) {
chunks.add(createChunk(currentChunk.toString().trim(), chunkIndex));
} else if (currentChunk.length() > 0 && !chunks.isEmpty()) {
// 너무 짧으면 이전 청크에 병합
ChunkResult lastChunk = chunks.get(chunks.size() - 1);
String merged = lastChunk.getContent() + " " + currentChunk.toString().trim();
chunks.set(chunks.size() - 1, createChunk(merged, lastChunk.getIndex()));
} else if (currentChunk.length() > 0) {
chunks.add(createChunk(currentChunk.toString().trim(), chunkIndex));
}
log.info("청킹 완료: {} 청크 생성", chunks.size());
return chunks;
}
/**
* 텍스트 정규화
*/
private String normalizeText(String text) {
return text
// 연속 공백 제거
.replaceAll("[ \\t]+", " ")
// 연속 줄바꿈 정리
.replaceAll("\\n{2,}", "\n\n")
.trim();
}
/**
* 문장 단위로 분할
*/
private List<String> splitIntoSentences(String text) {
List<String> sentences = new ArrayList<>();
// 한국어/영어 문장 종결 패턴
// . ! ? 뒤에 공백이나 줄바꿈이 오는 경우
Pattern sentencePattern = Pattern.compile("(?<=[.!?。])\\s+|(?<=\\n)");
String[] parts = sentencePattern.split(text);
for (String part : parts) {
String trimmed = part.trim();
if (!trimmed.isEmpty()) {
sentences.add(trimmed);
}
}
// 문장 분할이 잘 안되면 (문장이 1개인 경우) 단어 수 기준으로 분할
if (sentences.size() <= 1 && text.length() > chunkSize) {
sentences = splitByWords(text, chunkSize / 2);
}
return sentences;
}
/**
* 단어 수 기준으로 분할 (문장 분할 실패 시 폴백)
*/
private List<String> splitByWords(String text, int wordsPerChunk) {
List<String> chunks = new ArrayList<>();
String[] words = text.split("\\s+");
StringBuilder current = new StringBuilder();
int wordCount = 0;
for (String word : words) {
if (wordCount >= wordsPerChunk && current.length() > 0) {
chunks.add(current.toString().trim());
current = new StringBuilder();
wordCount = 0;
}
if (current.length() > 0) {
current.append(" ");
}
current.append(word);
wordCount++;
}
if (current.length() > 0) {
chunks.add(current.toString().trim());
}
return chunks;
}
private ChunkResult createChunk(String content, int index) {
return ChunkResult.builder()
.content(content)
.index(index)
.tokenCount(estimateTokenCount(content))
.build();
}
private String getOverlapText(String text) {
if (text.length() <= chunkOverlap) {
return text;
}
// 단어 경계에서 자르기
String overlap = text.substring(text.length() - chunkOverlap);
int spaceIndex = overlap.indexOf(' ');
if (spaceIndex > 0) {
overlap = overlap.substring(spaceIndex + 1);
}
return overlap;
}
private int estimateTokenCount(String text) {
// 대략적인 토큰 수 추정
int koreanChars = 0;
int otherChars = 0;
for (char c : text.toCharArray()) {
if (Character.UnicodeScript.of(c) == Character.UnicodeScript.HANGUL) {
koreanChars++;
} else {
otherChars++;
}
}
// 한글은 약 1.5자당 1토큰, 영어는 4자당 1토큰
return (int) (koreanChars / 1.5 + otherChars / 4);
}
@lombok.Data
@lombok.Builder
public static class ChunkResult {
private String content;
private int index;
private int tokenCount;
}
}

View File

@@ -0,0 +1,235 @@
package kr.co.ragone.service;
import kr.co.ragone.domain.DocChunk;
import kr.co.ragone.domain.DocInfo;
import kr.co.ragone.domain.TopicInfo;
import kr.co.ragone.repository.DocChunkRepository;
import kr.co.ragone.repository.DocInfoRepository;
import kr.co.ragone.repository.TopicInfoRepository;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import org.springframework.web.multipart.MultipartFile;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.LocalDateTime;
import java.util.List;
import java.util.UUID;
@Slf4j
@Service
@RequiredArgsConstructor
public class DocumentIndexingService {
private final TopicInfoRepository topicInfoRepository;
private final DocInfoRepository docInfoRepository;
private final DocChunkRepository docChunkRepository;
private final DocumentParserService documentParserService;
private final ChunkingService chunkingService;
private final EmbeddingService embeddingService;
private final JdbcTemplate jdbcTemplate;
@Value("${file.upload-dir:./uploads}")
private String uploadDir;
/**
* 문서 업로드 및 인덱싱
*/
@Transactional
public DocInfo uploadAndIndex(Long topicId, MultipartFile file) throws Exception {
// 1. 주제 확인
TopicInfo topicInfo = topicInfoRepository.findById(topicId)
.orElseThrow(() -> new IllegalArgumentException("주제를 찾을 수 없습니다: " + topicId));
// 2. 파일 저장
String savedFileName = saveFile(file);
String filePath = Paths.get(uploadDir, savedFileName).toString();
// 3. 문서 정보 저장 (PROCESSING 상태)
DocInfo docInfo = DocInfo.builder()
.topicInfo(topicInfo)
.fileName(savedFileName)
.originalName(file.getOriginalFilename())
.filePath(filePath)
.fileSize(file.getSize())
.fileType(getFileExtension(file.getOriginalFilename()))
.docStatus("PROCESSING")
.build();
docInfo = docInfoRepository.save(docInfo);
// 4. 비동기로 인덱싱 처리
processIndexingAsync(docInfo.getDocId(), topicInfo, file);
return docInfo;
}
/**
* 비동기 인덱싱 처리
*/
@Async
public void processIndexingAsync(Long docId, TopicInfo topicInfo, MultipartFile file) {
try {
processIndexing(docId, topicInfo, file);
} catch (Exception e) {
log.error("인덱싱 실패: docId={}", docId, e);
updateDocStatus(docId, "FAILED", e.getMessage());
}
}
/**
* 실제 인덱싱 처리
*/
private void processIndexing(Long docId, TopicInfo topicInfo, MultipartFile file) throws Exception {
log.info("인덱싱 시작: docId={}, fileName={}", docId, file.getOriginalFilename());
// 1. 문서 파싱
String content = documentParserService.parseDocument(file);
if (content == null || content.isBlank()) {
throw new RuntimeException("문서 내용이 비어있습니다.");
}
// 2. 청킹
List<ChunkingService.ChunkResult> chunks = chunkingService.chunkText(content);
if (chunks.isEmpty()) {
throw new RuntimeException("청크 생성 실패");
}
log.info("청크 생성 완료: {} chunks", chunks.size());
// 3. 각 청크에 대해 임베딩 생성 및 저장
DocInfo docInfo = docInfoRepository.findById(docId)
.orElseThrow(() -> new RuntimeException("문서를 찾을 수 없습니다."));
for (ChunkingService.ChunkResult chunk : chunks) {
// 임베딩 생성
String embeddingVector = embeddingService.createEmbeddingAsString(chunk.getContent());
// Native Query로 벡터 저장
saveChunkWithEmbedding(docInfo, topicInfo, chunk, embeddingVector);
log.debug("청크 저장 완료: index={}", chunk.getIndex());
}
// 4. 문서 상태 업데이트
updateDocStatus(docId, "INDEXED", null);
updateChunkCount(docId, chunks.size());
log.info("인덱싱 완료: docId={}, chunks={}", docId, chunks.size());
}
/**
* 청크 + 벡터 저장 (Native Query 사용)
*/
private void saveChunkWithEmbedding(DocInfo docInfo, TopicInfo topicInfo,
ChunkingService.ChunkResult chunk, String embedding) {
String sql = """
INSERT INTO TB_DOC_CHUNK
(doc_id, topic_id, chunk_content, chunk_embedding, chunk_index, token_count, created_at)
VALUES (?, ?, ?, ?::vector, ?, ?, ?)
""";
jdbcTemplate.update(sql,
docInfo.getDocId(),
topicInfo.getTopicId(),
chunk.getContent(),
embedding,
chunk.getIndex(),
chunk.getTokenCount(),
LocalDateTime.now()
);
}
/**
* 파일 저장
*/
private String saveFile(MultipartFile file) throws IOException {
Path uploadPath = Paths.get(uploadDir);
if (!Files.exists(uploadPath)) {
Files.createDirectories(uploadPath);
}
String originalFilename = file.getOriginalFilename();
String extension = getFileExtension(originalFilename);
String savedFileName = UUID.randomUUID().toString() + "." + extension;
Path filePath = uploadPath.resolve(savedFileName);
Files.copy(file.getInputStream(), filePath);
log.info("파일 저장: {}", filePath);
return savedFileName;
}
private String getFileExtension(String filename) {
if (filename == null) return "";
int lastDot = filename.lastIndexOf('.');
return lastDot > 0 ? filename.substring(lastDot + 1).toLowerCase() : "";
}
private void updateDocStatus(Long docId, String status, String errorMsg) {
docInfoRepository.findById(docId).ifPresent(doc -> {
doc.setDocStatus(status);
doc.setErrorMsg(errorMsg);
doc.setUpdatedAt(LocalDateTime.now());
docInfoRepository.save(doc);
});
}
private void updateChunkCount(Long docId, int count) {
docInfoRepository.findById(docId).ifPresent(doc -> {
doc.setChunkCount(count);
doc.setUpdatedAt(LocalDateTime.now());
docInfoRepository.save(doc);
});
}
/**
* 문서 삭제 (청크 포함)
*/
@Transactional
public void deleteDocument(Long docId) {
DocInfo docInfo = docInfoRepository.findById(docId)
.orElseThrow(() -> new IllegalArgumentException("문서를 찾을 수 없습니다: " + docId));
// 파일 삭제
try {
Path filePath = Paths.get(docInfo.getFilePath());
Files.deleteIfExists(filePath);
} catch (IOException e) {
log.warn("파일 삭제 실패: {}", docInfo.getFilePath(), e);
}
// DB 삭제 (CASCADE로 청크도 함께 삭제됨)
docInfoRepository.delete(docInfo);
log.info("문서 삭제 완료: docId={}", docId);
}
/**
* 주제별 전체 문서 삭제
*/
@Transactional
public void deleteAllByTopic(Long topicId) {
List<DocInfo> documents = docInfoRepository.findByTopicInfo_TopicId(topicId);
log.info("전체 문서 삭제 시작: topicId={}, count={}", topicId, documents.size());
for (DocInfo docInfo : documents) {
// 파일 삭제
try {
Path filePath = Paths.get(docInfo.getFilePath());
Files.deleteIfExists(filePath);
} catch (IOException e) {
log.warn("파일 삭제 실패: {}", docInfo.getFilePath(), e);
}
}
// DB 삭제 (CASCADE로 청크도 함께 삭제됨)
docInfoRepository.deleteAll(documents);
log.info("전체 문서 삭제 완료: topicId={}, count={}", topicId, documents.size());
}
}

View File

@@ -0,0 +1,120 @@
package kr.co.ragone.service;
import lombok.extern.slf4j.Slf4j;
import org.apache.tika.Tika;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.pdf.PDFParserConfig;
import org.apache.tika.sax.BodyContentHandler;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
import java.io.IOException;
import java.io.InputStream;
@Slf4j
@Service
public class DocumentParserService {
private final Tika tika = new Tika();
/**
* 파일에서 텍스트 추출 (PDF, DOCX, TXT 등 지원)
*/
public String parseDocument(MultipartFile file) throws IOException, TikaException {
String filename = file.getOriginalFilename();
log.info("문서 파싱 시작: {}", filename);
try (InputStream inputStream = file.getInputStream()) {
String content;
// PDF인 경우 특별 처리
if (filename != null && filename.toLowerCase().endsWith(".pdf")) {
content = parsePdfWithOptions(inputStream);
} else {
content = tika.parseToString(inputStream);
}
// 텍스트 정제
content = cleanText(content);
log.info("문서 파싱 완료: {} chars", content.length());
// 텍스트가 너무 짧으면 경고
if (content.length() < 100) {
log.warn("⚠️ 추출된 텍스트가 매우 짧습니다. PDF가 이미지 기반일 수 있습니다.");
log.warn("원본 파일: {}, 추출 길이: {} chars", filename, content.length());
}
return content;
}
}
/**
* PDF 파싱 옵션 설정
*/
private String parsePdfWithOptions(InputStream inputStream) throws IOException, TikaException {
try {
// PDF 파서 설정
PDFParserConfig pdfConfig = new PDFParserConfig();
pdfConfig.setExtractInlineImages(true);
pdfConfig.setExtractUniqueInlineImagesOnly(true);
pdfConfig.setOcrStrategy(PDFParserConfig.OCR_STRATEGY.AUTO); // OCR 자동 시도
ParseContext parseContext = new ParseContext();
parseContext.set(PDFParserConfig.class, pdfConfig);
// 파서 설정
Parser parser = new AutoDetectParser();
parseContext.set(Parser.class, parser);
// 메타데이터 및 콘텐츠 핸들러
Metadata metadata = new Metadata();
BodyContentHandler handler = new BodyContentHandler(-1); // 무제한
parser.parse(inputStream, handler, metadata, parseContext);
// 메타데이터 로깅
log.debug("PDF 메타데이터:");
for (String name : metadata.names()) {
log.debug(" {}: {}", name, metadata.get(name));
}
return handler.toString();
} catch (Exception e) {
log.error("PDF 파싱 실패, 기본 파싱으로 재시도", e);
// 기본 파싱으로 폴백
return tika.parseToString(inputStream);
}
}
/**
* 텍스트 정제
*/
private String cleanText(String text) {
if (text == null) return "";
return text
// 연속 공백을 단일 공백으로
.replaceAll("[ \\t]+", " ")
// 연속 줄바꿈을 2개로 제한
.replaceAll("\\n{3,}", "\n\n")
// 앞뒤 공백 제거
.trim();
}
/**
* 파일에서 텍스트 추출 (파일 경로로)
*/
public String parseDocument(java.io.File file) throws IOException, TikaException {
log.info("문서 파싱 시작: {}", file.getName());
String content = tika.parseToString(file);
content = cleanText(content);
log.info("문서 파싱 완료: {} chars", content.length());
return content;
}
}

View File

@@ -0,0 +1,55 @@
package kr.co.ragone.service;
import com.theokanning.openai.embedding.EmbeddingRequest;
import com.theokanning.openai.embedding.EmbeddingResult;
import com.theokanning.openai.service.OpenAiService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
@Slf4j
@Service
@RequiredArgsConstructor
public class EmbeddingService {
private final OpenAiService openAiService;
@Value("${openai.model.embedding}")
private String embeddingModel;
/**
* 텍스트를 임베딩 벡터로 변환
*/
public List<Double> createEmbedding(String text) {
EmbeddingRequest request = EmbeddingRequest.builder()
.model(embeddingModel)
.input(Collections.singletonList(text))
.build();
EmbeddingResult result = openAiService.createEmbeddings(request);
return result.getData().get(0).getEmbedding();
}
/**
* 벡터를 PostgreSQL vector 형식 문자열로 변환
*/
public String toVectorString(List<Double> embedding) {
return "[" + embedding.stream()
.map(String::valueOf)
.collect(Collectors.joining(",")) + "]";
}
/**
* 텍스트를 PostgreSQL vector 형식으로 직접 변환
*/
public String createEmbeddingAsString(String text) {
List<Double> embedding = createEmbedding(text);
return toVectorString(embedding);
}
}

View File

@@ -0,0 +1,54 @@
server:
port: 8080
spring:
application:
name: ragone
profiles:
active: local
datasource:
url: jdbc:postgresql://172.25.0.79:5432/turbosoft_rag_db
username: turbosoft
password: xjqhthvmxm123
driver-class-name: org.postgresql.Driver
jpa:
hibernate:
ddl-auto: validate
show-sql: true
properties:
hibernate:
format_sql: true
dialect: org.hibernate.dialect.PostgreSQLDialect
servlet:
multipart:
max-file-size: 50MB
max-request-size: 50MB
# OpenAI 설정
openai:
api-key: ${OPENAI_API_KEY:your-api-key-here}
model:
embedding: text-embedding-3-small
chat: gpt-4o-mini
# RAG 설정
rag:
chunk:
size: 1000
overlap: 100
retrieval:
top-k: 10
similarity-threshold: 0.3 # 더 낮춰서 검색 범위 확대
# 파일 저장 경로
file:
upload-dir: ./uploads
logging:
level:
kr.co.ragone: DEBUG
org.hibernate.SQL: DEBUG