diff --git a/booklore-api/src/main/java/com/adityachandel/booklore/model/entity/BookMetadataEntity.java b/booklore-api/src/main/java/com/adityachandel/booklore/model/entity/BookMetadataEntity.java index 8d3eeeaa2..15427e8cf 100644 --- a/booklore-api/src/main/java/com/adityachandel/booklore/model/entity/BookMetadataEntity.java +++ b/booklore-api/src/main/java/com/adityachandel/booklore/model/entity/BookMetadataEntity.java @@ -196,6 +196,12 @@ public class BookMetadataEntity { @Column(name = "reviews_locked") private Boolean reviewsLocked = Boolean.FALSE; + @Column(name = "embedding_vector", columnDefinition = "TEXT") + private String embeddingVector; + + @Column(name = "embedding_updated_at") + private Instant embeddingUpdatedAt; + @OneToOne(fetch = FetchType.LAZY) @MapsId @JoinColumn(name = "book_id") diff --git a/booklore-api/src/main/java/com/adityachandel/booklore/service/recommender/BookRecommendationScheduler.java b/booklore-api/src/main/java/com/adityachandel/booklore/service/recommender/BookRecommendationScheduler.java index b1e2aecee..d3eb046ba 100644 --- a/booklore-api/src/main/java/com/adityachandel/booklore/service/recommender/BookRecommendationScheduler.java +++ b/booklore-api/src/main/java/com/adityachandel/booklore/service/recommender/BookRecommendationScheduler.java @@ -2,6 +2,7 @@ package com.adityachandel.booklore.service.recommender; import com.adityachandel.booklore.model.dto.BookRecommendationLite; import com.adityachandel.booklore.model.entity.BookEntity; +import com.adityachandel.booklore.model.entity.BookMetadataEntity; import com.adityachandel.booklore.service.appsettings.AppSettingService; import com.adityachandel.booklore.service.BookQueryService; import jakarta.transaction.Transactional; @@ -10,8 +11,9 @@ import lombok.extern.slf4j.Slf4j; import org.springframework.scheduling.annotation.Scheduled; import org.springframework.stereotype.Component; -import java.util.List; -import java.util.Set; +import java.time.Instant; +import java.util.*; +import java.util.stream.Collectors; @Slf4j @Component @@ -19,8 +21,8 @@ import java.util.Set; public class BookRecommendationScheduler { private final BookQueryService bookQueryService; - private final BookRecommendationService recommendationService; private final AppSettingService appSettingService; + private final BookVectorService vectorService; private static final int RECOMMENDATION_LIMIT = 25; @@ -36,15 +38,68 @@ public class BookRecommendationScheduler { List allBooks = bookQueryService.getAllFullBookEntities(); + Map embeddings = new HashMap<>(); + List booksToUpdate = new ArrayList<>(); + for (BookEntity book : allBooks) { - try { - Set recommendations = recommendationService.findSimilarBookIds(book.getId(), RECOMMENDATION_LIMIT); - book.setSimilarBooksJson(recommendations); - } catch (Exception e) { - log.error("Error updating similar books for book ID {}: {}", book.getId(), e.getMessage(), e); + double[] embedding = vectorService.generateEmbedding(book); + embeddings.put(book.getId(), embedding); + + if (book.getMetadata() != null) { + String embeddingJson = vectorService.serializeVector(embedding); + if (!Objects.equals(book.getMetadata().getEmbeddingVector(), embeddingJson)) { + book.getMetadata().setEmbeddingVector(embeddingJson); + book.getMetadata().setEmbeddingUpdatedAt(Instant.now()); + } } } - bookQueryService.saveAll(allBooks); + + for (BookEntity targetBook : allBooks) { + try { + double[] targetVector = embeddings.get(targetBook.getId()); + if (targetVector == null) continue; + + String targetSeries = Optional.ofNullable(targetBook.getMetadata()) + .map(BookMetadataEntity::getSeriesName) + .map(String::toLowerCase) + .orElse(null); + + List candidates = allBooks.stream() + .filter(candidate -> !candidate.getId().equals(targetBook.getId())) + .filter(candidate -> { + String candidateSeries = Optional.ofNullable(candidate.getMetadata()) + .map(BookMetadataEntity::getSeriesName) + .map(String::toLowerCase) + .orElse(null); + return targetSeries == null || !targetSeries.equals(candidateSeries); + }) + .map(candidate -> { + double[] candidateVector = embeddings.get(candidate.getId()); + double similarity = vectorService.cosineSimilarity(targetVector, candidateVector); + return new BookVectorService.ScoredBook(candidate.getId(), similarity); + }) + .filter(scored -> scored.getScore() > 0.1) + .collect(Collectors.toList()); + + List topSimilar = vectorService.findTopKSimilar( + targetVector, + candidates, + RECOMMENDATION_LIMIT + ); + + Set recommendations = topSimilar.stream() + .map(scored -> new BookRecommendationLite(scored.getBookId(), scored.getScore())) + .collect(Collectors.toSet()); + + targetBook.setSimilarBooksJson(recommendations); + booksToUpdate.add(targetBook); + + } catch (Exception e) { + log.error("Error updating similar books for book ID {}: {}", targetBook.getId(), e.getMessage(), e); + } + } + + bookQueryService.saveAll(booksToUpdate); long endTime = System.currentTimeMillis(); log.info("Completed scheduled task 'updateAllSimilarBooks' at: {}. Duration: {} ms", endTime, endTime - startTime); diff --git a/booklore-api/src/main/java/com/adityachandel/booklore/service/recommender/BookVectorService.java b/booklore-api/src/main/java/com/adityachandel/booklore/service/recommender/BookVectorService.java new file mode 100644 index 000000000..ec31b65c5 --- /dev/null +++ b/booklore-api/src/main/java/com/adityachandel/booklore/service/recommender/BookVectorService.java @@ -0,0 +1,163 @@ +package com.adityachandel.booklore.service.recommender; + +import com.adityachandel.booklore.model.entity.AuthorEntity; +import com.adityachandel.booklore.model.entity.BookEntity; +import com.adityachandel.booklore.model.entity.BookMetadataEntity; +import com.adityachandel.booklore.model.entity.CategoryEntity; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.stereotype.Service; + +import java.util.*; +import java.util.stream.Collectors; + +@Slf4j +@Service +@RequiredArgsConstructor +public class BookVectorService { + + private static final ObjectMapper objectMapper = new ObjectMapper(); + private static final int VECTOR_DIMENSION = 128; + + public double[] generateEmbedding(BookEntity book) { + if (book.getMetadata() == null) { + return new double[VECTOR_DIMENSION]; + } + + BookMetadataEntity metadata = book.getMetadata(); + Map features = new HashMap<>(); + + if (metadata.getTitle() != null) { + addTextFeatures(features, "title", metadata.getTitle(), 3.0); + } + + if (metadata.getAuthors() != null) { + metadata.getAuthors().stream() + .map(AuthorEntity::getName) + .filter(Objects::nonNull) + .forEach(author -> features.put("author_" + author.toLowerCase(), 5.0)); + } + + if (metadata.getCategories() != null) { + metadata.getCategories().stream() + .map(CategoryEntity::getName) + .filter(Objects::nonNull) + .forEach(cat -> features.put("category_" + cat.toLowerCase(), 4.0)); + } + + if (metadata.getSeriesName() != null) { + features.put("series_" + metadata.getSeriesName().toLowerCase(), 6.0); + } + + if (metadata.getPublisher() != null) { + features.put("publisher_" + metadata.getPublisher().toLowerCase(), 2.0); + } + + if (metadata.getDescription() != null) { + addTextFeatures(features, "desc", metadata.getDescription(), 1.0); + } + + return featuresToVector(features); + } + + private void addTextFeatures(Map features, String prefix, String text, double weight) { + String[] words = text.toLowerCase() + .replaceAll("[^a-z0-9\\s]", " ") + .split("\\s+"); + + Arrays.stream(words) + .filter(w -> w.length() > 3) + .limit(20) + .forEach(word -> features.merge(prefix + "_" + word, weight, Double::sum)); + } + + private double[] featuresToVector(Map features) { + double[] vector = new double[VECTOR_DIMENSION]; + + for (Map.Entry entry : features.entrySet()) { + int hash = Math.abs(entry.getKey().hashCode()); + int index = hash % VECTOR_DIMENSION; + vector[index] += entry.getValue(); + } + + double norm = 0.0; + for (double v : vector) { + norm += v * v; + } + norm = Math.sqrt(norm); + + if (norm > 0) { + for (int i = 0; i < vector.length; i++) { + vector[i] /= norm; + } + } + + return vector; + } + + public String serializeVector(double[] vector) { + try { + return objectMapper.writeValueAsString(vector); + } catch (JsonProcessingException e) { + log.error("Error serializing vector", e); + return null; + } + } + + public double[] deserializeVector(String vectorJson) { + if (vectorJson == null || vectorJson.isEmpty()) { + return null; + } + try { + return objectMapper.readValue(vectorJson, double[].class); + } catch (JsonProcessingException e) { + log.error("Error deserializing vector", e); + return null; + } + } + + public double cosineSimilarity(double[] v1, double[] v2) { + if (v1 == null || v2 == null || v1.length != v2.length) { + return 0.0; + } + + double dotProduct = 0.0; + for (int i = 0; i < v1.length; i++) { + dotProduct += v1[i] * v2[i]; + } + + return dotProduct; + } + + public List findTopKSimilar(double[] targetVector, List candidates, int k) { + if (targetVector == null) { + return Collections.emptyList(); + } + + return candidates.stream() + .sorted(Comparator.comparingDouble(ScoredBook::getScore).reversed()) + .limit(k) + .collect(Collectors.toList()); + } + + public static class ScoredBook { + private final Long bookId; + private final double score; + + public ScoredBook(Long bookId, double score) { + this.bookId = bookId; + this.score = score; + } + + public Long getBookId() { + return bookId; + } + + public double getScore() { + return score; + } + } +} + diff --git a/booklore-api/src/main/resources/db/migration/V58__Vector_columns.sql b/booklore-api/src/main/resources/db/migration/V58__Vector_columns.sql new file mode 100644 index 000000000..93437d5a2 --- /dev/null +++ b/booklore-api/src/main/resources/db/migration/V58__Vector_columns.sql @@ -0,0 +1,2 @@ +ALTER TABLE book_metadata ADD COLUMN IF NOT EXISTS embedding_vector TEXT; +ALTER TABLE book_metadata ADD COLUMN IF NOT EXISTS embedding_updated_at DATETIME; \ No newline at end of file