import numpy as np
from typing import List, Dict
import logging
from ..models import FeedItem, FeedSignalRecord, FeedItemViewModel
from .text_embedding import TextEmbeddingService
logger = logging.getLogger(__name__)
class FeedRankingService:
"""Service for ranking feed items based on taste profile and signals."""
def __init__(self, embedding_service: TextEmbeddingService):
self.embedding_service = embedding_service
def rank(self, items: List[FeedItem], signals: List[FeedSignalRecord],
definition, embeddings: Dict[str, np.ndarray]) -> List[FeedItemViewModel]:
"""Rank feed items based on definition and signals."""
if not items:
return []
# Get positive signals (liked items) for taste profile
seed_uris = [s.subject_uri for s in signals if s.signal == "MoreLikeThis" and s.weight > 0]
# Compute taste profile from seed items
seed_embeddings = []
for uri in seed_uris:
if uri in embeddings:
seed_embeddings.append(embeddings[uri])
# If no signals, use first 10 items as seeds
if not seed_embeddings and items:
for item in items[:10]:
if item.subject_uri in embeddings:
seed_embeddings.append(embeddings[item.subject_uri])
taste_profile = None
if seed_embeddings:
taste_profile = np.mean(seed_embeddings, axis=0)
norm = np.linalg.norm(taste_profile)
if norm > 0:
taste_profile = taste_profile / norm
# Build author affinity scores from signals
author_scores: Dict[str, float] = {}
for signal in signals:
author_did = signal.metadata.get("author_did", "")
if not author_did:
continue
if signal.signal == "MoreLikeThis":
author_scores[author_did] = author_scores.get(author_did, 0) + signal.weight
elif signal.signal == "LessFromAuthor":
author_scores[author_did] = author_scores.get(author_did, 0) - abs(signal.weight)
elif signal.signal == "LessOfThis":
author_scores[author_did] = author_scores.get(author_did, 0) - abs(signal.weight) * 0.5
# Score each item
scored_items: List[tuple] = []
for item in items:
# Semantic score
semantic_score = 0.5 # Default neutral
if taste_profile is not None and item.subject_uri in embeddings:
item_embedding = embeddings[item.subject_uri]
semantic_score = self.embedding_service.cosine_similarity(taste_profile, item_embedding)
# Normalize to 0-1 range (cosine similarity is -1 to 1)
semantic_score = (semantic_score + 1) / 2
# Author affinity
author_affinity = author_scores.get(item.author_did, 0)
# Normalize to 0-1 range
author_affinity = max(0, min(1, (author_affinity + 5) / 10))
# Combined score
# Base: 0.5, Semantic: 0.4, Author: 0.1
score = 0.5 + (semantic_score * 0.4) + (author_affinity * 0.1)
# Penalize based on definition strategy
if definition.id == "balanced":
pass # Keep as is
elif definition.id == "semantic":
score = semantic_score * 0.8 + 0.2
elif definition.id == "recent":
# Boost recent items
pass # Could add time decay here
scored_items.append((score, semantic_score, author_affinity, item))
# Sort by score descending
scored_items.sort(key=lambda x: x[0], reverse=True)
# Convert to view models
return [
FeedItemViewModel(
subject_uri=item.subject_uri,
author_did=item.author_did,
author_handle=item.author_handle,
author_display_name=item.author_display_name,
text=item.text,
origin=item.origin,
labels=item.labels or [],
created_at=item.created_at,
score=score,
semantic_score=semantic_score,
author_affinity=author_affinity,
explanation=f"Semantic: {semantic_score:.2f}, Author: {author_affinity:.2f}"
)
for score, semantic_score, author_affinity, item in scored_items
]
def get_taste_profile(self, actor_did: str, items: List[FeedItem],
signals: List[FeedSignalRecord], embeddings: Dict[str, np.ndarray]) -> dict:
"""Get taste profile information for display."""
# Get seed influences
seed_uris = [s.subject_uri for s in signals if s.signal == "MoreLikeThis" and s.weight > 0][:20]
# Get top contributors (highest similarity to taste profile)
seed_embeddings = [embeddings[uri] for uri in seed_uris if uri in embeddings]
if not seed_embeddings:
return {
"seed_count": 0,
"candidate_count": len(items),
"ideal_post": None,
"taste_keywords": None,
"top_seed_influences": [],
"top_contributors": []
}
taste_profile = np.mean(seed_embeddings, axis=0)
norm = np.linalg.norm(taste_profile)
if norm > 0:
taste_profile = taste_profile / norm
# Find ideal post (closest to taste profile)
best_match = None
best_sim = -1
for item in items:
if item.subject_uri in embeddings:
sim = self.embedding_service.cosine_similarity(taste_profile, embeddings[item.subject_uri])
if sim > best_sim:
best_sim = sim
best_match = item
# Compute top contributors
contributors = []
for item in items[:100]: # Sample for performance
if item.subject_uri in embeddings:
sim = self.embedding_service.cosine_similarity(taste_profile, embeddings[item.subject_uri])
contributors.append((sim, item))
contributors.sort(reverse=True)
return {
"seed_count": len(seed_uris),
"candidate_count": len(items),
"ideal_post": best_match,
"taste_keywords": None, # Could extract from vocab weights
"top_seed_influences": [],
"top_contributors": [item for _, item in contributors[:10]]
}