This commit is contained in:
2025-12-11 02:09:57 +09:00
parent 6c80670b47
commit 8749de6aef
34 changed files with 2115 additions and 0 deletions

View File

@@ -0,0 +1,235 @@
package kr.co.ragone.service;
import kr.co.ragone.domain.DocChunk;
import kr.co.ragone.domain.DocInfo;
import kr.co.ragone.domain.TopicInfo;
import kr.co.ragone.repository.DocChunkRepository;
import kr.co.ragone.repository.DocInfoRepository;
import kr.co.ragone.repository.TopicInfoRepository;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import org.springframework.web.multipart.MultipartFile;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.LocalDateTime;
import java.util.List;
import java.util.UUID;
@Slf4j
@Service
@RequiredArgsConstructor
public class DocumentIndexingService {
private final TopicInfoRepository topicInfoRepository;
private final DocInfoRepository docInfoRepository;
private final DocChunkRepository docChunkRepository;
private final DocumentParserService documentParserService;
private final ChunkingService chunkingService;
private final EmbeddingService embeddingService;
private final JdbcTemplate jdbcTemplate;
@Value("${file.upload-dir:./uploads}")
private String uploadDir;
/**
* 문서 업로드 및 인덱싱
*/
@Transactional
public DocInfo uploadAndIndex(Long topicId, MultipartFile file) throws Exception {
// 1. 주제 확인
TopicInfo topicInfo = topicInfoRepository.findById(topicId)
.orElseThrow(() -> new IllegalArgumentException("주제를 찾을 수 없습니다: " + topicId));
// 2. 파일 저장
String savedFileName = saveFile(file);
String filePath = Paths.get(uploadDir, savedFileName).toString();
// 3. 문서 정보 저장 (PROCESSING 상태)
DocInfo docInfo = DocInfo.builder()
.topicInfo(topicInfo)
.fileName(savedFileName)
.originalName(file.getOriginalFilename())
.filePath(filePath)
.fileSize(file.getSize())
.fileType(getFileExtension(file.getOriginalFilename()))
.docStatus("PROCESSING")
.build();
docInfo = docInfoRepository.save(docInfo);
// 4. 비동기로 인덱싱 처리
processIndexingAsync(docInfo.getDocId(), topicInfo, file);
return docInfo;
}
/**
* 비동기 인덱싱 처리
*/
@Async
public void processIndexingAsync(Long docId, TopicInfo topicInfo, MultipartFile file) {
try {
processIndexing(docId, topicInfo, file);
} catch (Exception e) {
log.error("인덱싱 실패: docId={}", docId, e);
updateDocStatus(docId, "FAILED", e.getMessage());
}
}
/**
* 실제 인덱싱 처리
*/
private void processIndexing(Long docId, TopicInfo topicInfo, MultipartFile file) throws Exception {
log.info("인덱싱 시작: docId={}, fileName={}", docId, file.getOriginalFilename());
// 1. 문서 파싱
String content = documentParserService.parseDocument(file);
if (content == null || content.isBlank()) {
throw new RuntimeException("문서 내용이 비어있습니다.");
}
// 2. 청킹
List<ChunkingService.ChunkResult> chunks = chunkingService.chunkText(content);
if (chunks.isEmpty()) {
throw new RuntimeException("청크 생성 실패");
}
log.info("청크 생성 완료: {} chunks", chunks.size());
// 3. 각 청크에 대해 임베딩 생성 및 저장
DocInfo docInfo = docInfoRepository.findById(docId)
.orElseThrow(() -> new RuntimeException("문서를 찾을 수 없습니다."));
for (ChunkingService.ChunkResult chunk : chunks) {
// 임베딩 생성
String embeddingVector = embeddingService.createEmbeddingAsString(chunk.getContent());
// Native Query로 벡터 저장
saveChunkWithEmbedding(docInfo, topicInfo, chunk, embeddingVector);
log.debug("청크 저장 완료: index={}", chunk.getIndex());
}
// 4. 문서 상태 업데이트
updateDocStatus(docId, "INDEXED", null);
updateChunkCount(docId, chunks.size());
log.info("인덱싱 완료: docId={}, chunks={}", docId, chunks.size());
}
/**
* 청크 + 벡터 저장 (Native Query 사용)
*/
private void saveChunkWithEmbedding(DocInfo docInfo, TopicInfo topicInfo,
ChunkingService.ChunkResult chunk, String embedding) {
String sql = """
INSERT INTO TB_DOC_CHUNK
(doc_id, topic_id, chunk_content, chunk_embedding, chunk_index, token_count, created_at)
VALUES (?, ?, ?, ?::vector, ?, ?, ?)
""";
jdbcTemplate.update(sql,
docInfo.getDocId(),
topicInfo.getTopicId(),
chunk.getContent(),
embedding,
chunk.getIndex(),
chunk.getTokenCount(),
LocalDateTime.now()
);
}
/**
* 파일 저장
*/
private String saveFile(MultipartFile file) throws IOException {
Path uploadPath = Paths.get(uploadDir);
if (!Files.exists(uploadPath)) {
Files.createDirectories(uploadPath);
}
String originalFilename = file.getOriginalFilename();
String extension = getFileExtension(originalFilename);
String savedFileName = UUID.randomUUID().toString() + "." + extension;
Path filePath = uploadPath.resolve(savedFileName);
Files.copy(file.getInputStream(), filePath);
log.info("파일 저장: {}", filePath);
return savedFileName;
}
private String getFileExtension(String filename) {
if (filename == null) return "";
int lastDot = filename.lastIndexOf('.');
return lastDot > 0 ? filename.substring(lastDot + 1).toLowerCase() : "";
}
private void updateDocStatus(Long docId, String status, String errorMsg) {
docInfoRepository.findById(docId).ifPresent(doc -> {
doc.setDocStatus(status);
doc.setErrorMsg(errorMsg);
doc.setUpdatedAt(LocalDateTime.now());
docInfoRepository.save(doc);
});
}
private void updateChunkCount(Long docId, int count) {
docInfoRepository.findById(docId).ifPresent(doc -> {
doc.setChunkCount(count);
doc.setUpdatedAt(LocalDateTime.now());
docInfoRepository.save(doc);
});
}
/**
* 문서 삭제 (청크 포함)
*/
@Transactional
public void deleteDocument(Long docId) {
DocInfo docInfo = docInfoRepository.findById(docId)
.orElseThrow(() -> new IllegalArgumentException("문서를 찾을 수 없습니다: " + docId));
// 파일 삭제
try {
Path filePath = Paths.get(docInfo.getFilePath());
Files.deleteIfExists(filePath);
} catch (IOException e) {
log.warn("파일 삭제 실패: {}", docInfo.getFilePath(), e);
}
// DB 삭제 (CASCADE로 청크도 함께 삭제됨)
docInfoRepository.delete(docInfo);
log.info("문서 삭제 완료: docId={}", docId);
}
/**
* 주제별 전체 문서 삭제
*/
@Transactional
public void deleteAllByTopic(Long topicId) {
List<DocInfo> documents = docInfoRepository.findByTopicInfo_TopicId(topicId);
log.info("전체 문서 삭제 시작: topicId={}, count={}", topicId, documents.size());
for (DocInfo docInfo : documents) {
// 파일 삭제
try {
Path filePath = Paths.get(docInfo.getFilePath());
Files.deleteIfExists(filePath);
} catch (IOException e) {
log.warn("파일 삭제 실패: {}", docInfo.getFilePath(), e);
}
}
// DB 삭제 (CASCADE로 청크도 함께 삭제됨)
docInfoRepository.deleteAll(documents);
log.info("전체 문서 삭제 완료: topicId={}, count={}", topicId, documents.size());
}
}