package kr.co.ragone.service; import kr.co.ragone.domain.DocChunk; import kr.co.ragone.domain.DocInfo; import kr.co.ragone.domain.TopicInfo; import kr.co.ragone.repository.DocChunkRepository; import kr.co.ragone.repository.DocInfoRepository; import kr.co.ragone.repository.TopicInfoRepository; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Value; import org.springframework.jdbc.core.JdbcTemplate; import org.springframework.scheduling.annotation.Async; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; import org.springframework.web.multipart.MultipartFile; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.time.LocalDateTime; import java.util.List; import java.util.UUID; @Slf4j @Service @RequiredArgsConstructor public class DocumentIndexingService { private final TopicInfoRepository topicInfoRepository; private final DocInfoRepository docInfoRepository; private final DocChunkRepository docChunkRepository; private final DocumentParserService documentParserService; private final ChunkingService chunkingService; private final EmbeddingService embeddingService; private final SmartChunkingService smartChunkingService; private final VisionService visionService; private final JdbcTemplate jdbcTemplate; @Value("${file.upload-dir:./uploads}") private String uploadDir; /** * 문서 업로드 및 인덱싱 */ @Transactional public DocInfo uploadAndIndex(Long topicId, MultipartFile file) throws Exception { // 1. 주제 확인 TopicInfo topicInfo = topicInfoRepository.findById(topicId) .orElseThrow(() -> new IllegalArgumentException("주제를 찾을 수 없습니다: " + topicId)); // 2. 파일 저장 String savedFileName = saveFile(file); String filePath = Paths.get(uploadDir, savedFileName).toString(); // 3. 문서 정보 저장 (PROCESSING 상태) DocInfo docInfo = DocInfo.builder() .topicInfo(topicInfo) .fileName(savedFileName) .originalName(file.getOriginalFilename()) .filePath(filePath) .fileSize(file.getSize()) .fileType(getFileExtension(file.getOriginalFilename())) .docStatus("PROCESSING") .build(); docInfo = docInfoRepository.save(docInfo); // 4. 비동기로 인덱싱 처리 processIndexingAsync(docInfo.getDocId(), topicInfo, file); return docInfo; } /** * 비동기 인덱싱 처리 */ @Async public void processIndexingAsync(Long docId, TopicInfo topicInfo, MultipartFile file) { try { processIndexing(docId, topicInfo, file); } catch (Exception e) { log.error("인덱싱 실패: docId={}", docId, e); updateDocStatus(docId, "FAILED", e.getMessage()); } } /** * 실제 인덱싱 처리 */ private void processIndexing(Long docId, TopicInfo topicInfo, MultipartFile file) throws Exception { log.info("인덱싱 시작: docId={}, fileName={}", docId, file.getOriginalFilename()); // 문서 정보 조회 DocInfo docInfo = docInfoRepository.findById(docId) .orElseThrow(() -> new RuntimeException("문서를 찾을 수 없습니다.")); String content; // 1. Vision 처리 (PDF + Vision 활성화된 경우) String fileType = getFileExtension(file.getOriginalFilename()); if ("pdf".equalsIgnoreCase(fileType) && visionService.isEnabled()) { log.info("[Vision] PDF Vision 분석 시작..."); content = visionService.processPdfWithVision(docInfo.getFilePath()); if (content == null || content.isBlank()) { log.warn("[Vision] Vision 분석 실패, 기본 파서로 대체"); content = documentParserService.parseDocument(file); } else { log.info("[Vision] Vision 분석 완료: {} 글자", content.length()); } } else { // 2. 기본 문서 파싱 (Tika) content = documentParserService.parseDocument(file); } if (content == null || content.isBlank()) { throw new RuntimeException("문서 내용이 비어있습니다."); } // 3. 청킹 List chunks = chunkingService.chunkText(content); if (chunks.isEmpty()) { throw new RuntimeException("청크 생성 실패"); } log.info("청크 생성 완료: {} chunks", chunks.size()); // 4. 각 청크에 대해 임베딩 생성 및 저장 for (int i = 0; i < chunks.size(); i++) { ChunkingService.ChunkResult chunk = chunks.get(i); // 임베딩 생성 String embeddingVector = embeddingService.createEmbeddingAsString(chunk.getContent()); // 스마트 청킹: 메타데이터 생성 (활성화된 경우) SmartChunkingService.ChunkMetadata metadata = null; if (smartChunkingService.isEnabled()) { log.info("[SmartChunking] 메타데이터 생성 중... ({}/{})", i + 1, chunks.size()); metadata = smartChunkingService.generateMetadata(chunk.getContent()); } // Native Query로 벡터 + 메타데이터 저장 saveChunkWithEmbedding(docInfo, topicInfo, chunk, embeddingVector, metadata); log.debug("청크 저장 완료: index={}", chunk.getIndex()); } // 5. 문서 상태 업데이트 updateDocStatus(docId, "INDEXED", null); updateChunkCount(docId, chunks.size()); log.info("인덱싱 완료: docId={}, chunks={}", docId, chunks.size()); } /** * 청크 + 벡터 + 메타데이터 저장 (Native Query 사용) */ private void saveChunkWithEmbedding(DocInfo docInfo, TopicInfo topicInfo, ChunkingService.ChunkResult chunk, String embedding, SmartChunkingService.ChunkMetadata metadata) { String sql = """ INSERT INTO TB_DOC_CHUNK (doc_id, topic_id, chunk_content, chunk_embedding, chunk_index, token_count, chunk_summary, chunk_keywords, chunk_questions, chunk_type, created_at) VALUES (?, ?, ?, ?::vector, ?, ?, ?, ?, ?, ?, ?) """; // 메타데이터 처리 String summary = null; String keywords = null; String questions = null; if (metadata != null) { summary = metadata.getSummary(); keywords = metadata.getKeywords() != null ? String.join(", ", metadata.getKeywords()) : null; questions = metadata.getQuestions() != null ? toJson(metadata.getQuestions()) : null; } jdbcTemplate.update(sql, docInfo.getDocId(), topicInfo.getTopicId(), chunk.getContent(), embedding, chunk.getIndex(), chunk.getTokenCount(), summary, keywords, questions, "text", LocalDateTime.now() ); } /** * List를 JSON 문자열로 변환 */ private String toJson(java.util.List list) { if (list == null || list.isEmpty()) return null; try { com.fasterxml.jackson.databind.ObjectMapper mapper = new com.fasterxml.jackson.databind.ObjectMapper(); return mapper.writeValueAsString(list); } catch (Exception e) { return null; } } /** * 파일 저장 */ private String saveFile(MultipartFile file) throws IOException { Path uploadPath = Paths.get(uploadDir); if (!Files.exists(uploadPath)) { Files.createDirectories(uploadPath); } String originalFilename = file.getOriginalFilename(); String extension = getFileExtension(originalFilename); String savedFileName = UUID.randomUUID().toString() + "." + extension; Path filePath = uploadPath.resolve(savedFileName); Files.copy(file.getInputStream(), filePath); log.info("파일 저장: {}", filePath); return savedFileName; } private String getFileExtension(String filename) { if (filename == null) return ""; int lastDot = filename.lastIndexOf('.'); return lastDot > 0 ? filename.substring(lastDot + 1).toLowerCase() : ""; } private void updateDocStatus(Long docId, String status, String errorMsg) { docInfoRepository.findById(docId).ifPresent(doc -> { doc.setDocStatus(status); doc.setErrorMsg(errorMsg); doc.setUpdatedAt(LocalDateTime.now()); docInfoRepository.save(doc); }); } private void updateChunkCount(Long docId, int count) { docInfoRepository.findById(docId).ifPresent(doc -> { doc.setChunkCount(count); doc.setUpdatedAt(LocalDateTime.now()); docInfoRepository.save(doc); }); } /** * 문서 삭제 (청크 포함) */ @Transactional public void deleteDocument(Long docId) { DocInfo docInfo = docInfoRepository.findById(docId) .orElseThrow(() -> new IllegalArgumentException("문서를 찾을 수 없습니다: " + docId)); // 파일 삭제 try { Path filePath = Paths.get(docInfo.getFilePath()); Files.deleteIfExists(filePath); } catch (IOException e) { log.warn("파일 삭제 실패: {}", docInfo.getFilePath(), e); } // DB 삭제 (CASCADE로 청크도 함께 삭제됨) docInfoRepository.delete(docInfo); log.info("문서 삭제 완료: docId={}", docId); } /** * 주제별 전체 문서 삭제 */ @Transactional public void deleteAllByTopic(Long topicId) { List documents = docInfoRepository.findByTopicInfo_TopicId(topicId); log.info("전체 문서 삭제 시작: topicId={}, count={}", topicId, documents.size()); for (DocInfo docInfo : documents) { // 파일 삭제 try { Path filePath = Paths.get(docInfo.getFilePath()); Files.deleteIfExists(filePath); } catch (IOException e) { log.warn("파일 삭제 실패: {}", docInfo.getFilePath(), e); } } // DB 삭제 (CASCADE로 청크도 함께 삭제됨) docInfoRepository.deleteAll(documents); log.info("전체 문서 삭제 완료: topicId={}, count={}", topicId, documents.size()); } }