nandi / dashboard

import numpy as np
from typing import List, Dict
import logging

from ..models import FeedItem, FeedSignalRecord, FeedItemViewModel
from .text_embedding import TextEmbeddingService

logger = logging.getLogger(__name__)


class FeedRankingService:
    """Service for ranking feed items based on taste profile and signals."""

    def __init__(self, embedding_service: TextEmbeddingService):
        self.embedding_service = embedding_service

    def rank(self, items: List[FeedItem], signals: List[FeedSignalRecord],
             definition, embeddings: Dict[str, np.ndarray]) -> List[FeedItemViewModel]:
        """Rank feed items based on definition and signals."""

        if not items:
            return []

        # Get positive signals (liked items) for taste profile
        seed_uris = [s.subject_uri for s in signals if s.signal == "MoreLikeThis" and s.weight > 0]

        # Compute taste profile from seed items
        seed_embeddings = []
        for uri in seed_uris:
            if uri in embeddings:
                seed_embeddings.append(embeddings[uri])

        # If no signals, use first 10 items as seeds
        if not seed_embeddings and items:
            for item in items[:10]:
                if item.subject_uri in embeddings:
                    seed_embeddings.append(embeddings[item.subject_uri])

        taste_profile = None
        if seed_embeddings:
            taste_profile = np.mean(seed_embeddings, axis=0)
            norm = np.linalg.norm(taste_profile)
            if norm > 0:
                taste_profile = taste_profile / norm

        # Build author affinity scores from signals
        author_scores: Dict[str, float] = {}
        for signal in signals:
            author_did = signal.metadata.get("author_did", "")
            if not author_did:
                continue

            if signal.signal == "MoreLikeThis":
                author_scores[author_did] = author_scores.get(author_did, 0) + signal.weight
            elif signal.signal == "LessFromAuthor":
                author_scores[author_did] = author_scores.get(author_did, 0) - abs(signal.weight)
            elif signal.signal == "LessOfThis":
                author_scores[author_did] = author_scores.get(author_did, 0) - abs(signal.weight) * 0.5

        # Score each item
        scored_items: List[tuple] = []
        for item in items:
            # Semantic score
            semantic_score = 0.5  # Default neutral
            if taste_profile is not None and item.subject_uri in embeddings:
                item_embedding = embeddings[item.subject_uri]
                semantic_score = self.embedding_service.cosine_similarity(taste_profile, item_embedding)
                # Normalize to 0-1 range (cosine similarity is -1 to 1)
                semantic_score = (semantic_score + 1) / 2

            # Author affinity
            author_affinity = author_scores.get(item.author_did, 0)
            # Normalize to 0-1 range
            author_affinity = max(0, min(1, (author_affinity + 5) / 10))

            # Combined score
            # Base: 0.5, Semantic: 0.4, Author: 0.1
            score = 0.5 + (semantic_score * 0.4) + (author_affinity * 0.1)

            # Penalize based on definition strategy
            if definition.id == "balanced":
                pass  # Keep as is
            elif definition.id == "semantic":
                score = semantic_score * 0.8 + 0.2
            elif definition.id == "recent":
                # Boost recent items
                pass  # Could add time decay here

            scored_items.append((score, semantic_score, author_affinity, item))

        # Sort by score descending
        scored_items.sort(key=lambda x: x[0], reverse=True)

        # Convert to view models
        return [
            FeedItemViewModel(
                subject_uri=item.subject_uri,
                author_did=item.author_did,
                author_handle=item.author_handle,
                author_display_name=item.author_display_name,
                text=item.text,
                origin=item.origin,
                labels=item.labels or [],
                created_at=item.created_at,
                score=score,
                semantic_score=semantic_score,
                author_affinity=author_affinity,
                explanation=f"Semantic: {semantic_score:.2f}, Author: {author_affinity:.2f}"
            )
            for score, semantic_score, author_affinity, item in scored_items
        ]

    def get_taste_profile(self, actor_did: str, items: List[FeedItem],
                          signals: List[FeedSignalRecord], embeddings: Dict[str, np.ndarray]) -> dict:
        """Get taste profile information for display."""

        # Get seed influences
        seed_uris = [s.subject_uri for s in signals if s.signal == "MoreLikeThis" and s.weight > 0][:20]

        # Get top contributors (highest similarity to taste profile)
        seed_embeddings = [embeddings[uri] for uri in seed_uris if uri in embeddings]

        if not seed_embeddings:
            return {
                "seed_count": 0,
                "candidate_count": len(items),
                "ideal_post": None,
                "taste_keywords": None,
                "top_seed_influences": [],
                "top_contributors": []
            }

        taste_profile = np.mean(seed_embeddings, axis=0)
        norm = np.linalg.norm(taste_profile)
        if norm > 0:
            taste_profile = taste_profile / norm

        # Find ideal post (closest to taste profile)
        best_match = None
        best_sim = -1
        for item in items:
            if item.subject_uri in embeddings:
                sim = self.embedding_service.cosine_similarity(taste_profile, embeddings[item.subject_uri])
                if sim > best_sim:
                    best_sim = sim
                    best_match = item

        # Compute top contributors
        contributors = []
        for item in items[:100]:  # Sample for performance
            if item.subject_uri in embeddings:
                sim = self.embedding_service.cosine_similarity(taste_profile, embeddings[item.subject_uri])
                contributors.append((sim, item))

        contributors.sort(reverse=True)

        return {
            "seed_count": len(seed_uris),
            "candidate_count": len(items),
            "ideal_post": best_match,
            "taste_keywords": None,  # Could extract from vocab weights
            "top_seed_influences": [],
            "top_contributors": [item for _, item in contributors[:10]]
        }