Spaces:

davidtran999
/

hue-portal-backend-v2

Running

App Files Files Community

davidtran999 commited on 18 days ago

Commit

5150cc5

1 Parent(s): 519b145

Fix: Copy pure_semantic_search, query_rewriter, redis_cache to backend/hue_portal/core/

Browse files

Files changed (3) hide show

backend/hue_portal/core/pure_semantic_search.py +322 -0
backend/hue_portal/core/query_rewriter.py +348 -0
backend/hue_portal/core/redis_cache.py +240 -0

backend/hue_portal/core/pure_semantic_search.py ADDED Viewed

	@@ -0,0 +1,322 @@

+"""
+Pure Semantic Search - 100% vector search with multi-query support.
+This module implements pure semantic search (no BM25) which is the recommended
+approach when using Query Rewrite Strategy + BGE-M3. All top systems have moved
+away from hybrid search (BM25 + Vector) to pure semantic search since Oct 2025.
+"""
+import logging
+from typing import List, Tuple, Optional, Dict, Any, Set
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from django.db.models import QuerySet
+from .embeddings import (
+    get_embedding_model,
+    generate_embedding,
+    cosine_similarity
+)
+from .embedding_utils import load_embedding
+logger = logging.getLogger(__name__)
+# Minimum vector score threshold
+DEFAULT_MIN_VECTOR_SCORE = 0.1
+def get_vector_scores(
+    queryset: QuerySet,
+    query: str,
+    top_k: int = 20
+) -> List[Tuple[Any, float]]:
+    """
+    Get vector similarity scores for queryset.
+    This is extracted from hybrid_search.py for use in pure semantic search.
+    Args:
+        queryset: Django QuerySet to search.
+        query: Search query string.
+        top_k: Maximum number of results.
+    Returns:
+        List of (object, vector_score) tuples.
+    """
+    if not query or not query.strip():
+        return []
+    # Generate query embedding
+    model = get_embedding_model()
+    if model is None:
+        return []
+    query_embedding = generate_embedding(query, model=model)
+    if query_embedding is None:
+        return []
+    # Get all objects with embeddings
+    all_objects = list(queryset)
+    if not all_objects:
+        return []
+    # Check dimension compatibility first
+    query_dim = len(query_embedding)
+    dimension_mismatch = False
+    # Calculate similarities
+    scores = []
+    for obj in all_objects:
+        obj_embedding = load_embedding(obj)
+        if obj_embedding is not None:
+            obj_dim = len(obj_embedding)
+            if obj_dim != query_dim:
+                # Dimension mismatch - skip vector search for this object
+                if not dimension_mismatch:
+                    logger.warning(
+                        f"Dimension mismatch: query={query_dim}, stored={obj_dim}. Skipping vector search."
+                    )
+                    dimension_mismatch = True
+                continue
+            similarity = cosine_similarity(query_embedding, obj_embedding)
+            if similarity >= DEFAULT_MIN_VECTOR_SCORE:
+                scores.append((obj, similarity))
+    # If dimension mismatch detected, return empty
+    if dimension_mismatch and not scores:
+        return []
+    # Sort by score descending
+    scores.sort(key=lambda x: x[1], reverse=True)
+    return scores[:top_k * 2]  # Get more for merging with other queries
+def calculate_exact_match_boost(obj: Any, query: str, text_fields: List[str]) -> float:
+    """
+    Calculate boost score for exact keyword matches in title/name fields.
+    This ensures exact matches are prioritized even in pure semantic search.
+    Args:
+        obj: Django model instance.
+        query: Search query string.
+        text_fields: List of field names to check (first 2 are usually title/name).
+    Returns:
+        Boost score (0.0 to 1.0).
+    """
+    if not query or not text_fields:
+        return 0.0
+    query_lower = query.lower().strip()
+    # Extract key phrases (2-3 word combinations) from query
+    query_words = query_lower.split()
+    key_phrases = []
+    for i in range(len(query_words) - 1):
+        phrase = " ".join(query_words[i:i+2])
+        if len(phrase) > 3:
+            key_phrases.append(phrase)
+    for i in range(len(query_words) - 2):
+        phrase = " ".join(query_words[i:i+3])
+        if len(phrase) > 5:
+            key_phrases.append(phrase)
+    # Also add individual words (longer than 2 chars)
+    query_words_set = set(word for word in query_words if len(word) > 2)
+    boost = 0.0
+    # Check primary fields (title, name) for exact matches
+    # First 2 fields are usually title/name
+    for field in text_fields[:2]:
+        if hasattr(obj, field):
+            field_value = str(getattr(obj, field, "")).lower()
+            if field_value:
+                # Check for key phrases first (highest priority)
+                for phrase in key_phrases:
+                    if phrase in field_value:
+                        # Major boost for phrase match
+                        boost += 0.5
+                        # Extra boost if it's the exact field value
+                        if field_value.strip() == phrase.strip():
+                            boost += 0.3
+                # Check for full query match
+                if query_lower in field_value:
+                    boost += 0.4
+                # Count matched individual words
+                matched_words = sum(1 for word in query_words_set if word in field_value)
+                if matched_words > 0:
+                    # Moderate boost for word matches
+                    boost += 0.1 * min(matched_words, 3)  # Cap at 3 words
+    return min(boost, 1.0)  # Cap at 1.0 for very strong matches
+def parallel_vector_search(
+    queries: List[str],
+    queryset: QuerySet,
+    top_k_per_query: int = 5,
+    final_top_k: int = 7,
+    text_fields: Optional[List[str]] = None
+) -> List[Tuple[Any, float]]:
+    """
+    Search with multiple queries in parallel, then merge results.
+    This is the core of Query Rewrite Strategy - run multiple vector searches
+    in parallel and merge results to get the best documents.
+    Args:
+        queries: List of rewritten queries (3-5 queries from Query Rewrite).
+        queryset: Django QuerySet to search.
+        top_k_per_query: Top K results per query (default: 5).
+        final_top_k: Final top K results after merging (default: 7).
+        text_fields: Optional list of field names for exact match boost.
+    Returns:
+        List of (object, combined_score) tuples, sorted by score descending.
+    Example:
+        queries = [
+            "nội dung điều 12",
+            "quy định điều 12",
+            "điều 12 quy định về"
+        ]
+        results = parallel_vector_search(queries, LegalSection.objects.all())
+        # Returns top 7 sections with highest combined scores
+    """
+    if not queries or not queries[0].strip():
+        return []
+    if len(queries) == 1:
+        # Single query - use direct vector search
+        return _single_query_search(queries[0], queryset, top_k=final_top_k, text_fields=text_fields)
+    # Multiple queries - run in parallel
+    all_results: Dict[Any, float] = {}  # object -> max_score
+    # Use ThreadPoolExecutor for parallel searches
+    with ThreadPoolExecutor(max_workers=min(len(queries), 5)) as executor:
+        # Submit all searches
+        future_to_query = {
+            executor.submit(get_vector_scores, queryset, query, top_k=top_k_per_query): query
+            for query in queries
+        }
+        # Collect results as they complete
+        for future in as_completed(future_to_query):
+            query = future_to_query[future]
+            try:
+                results = future.result()
+                # Merge results: use max score for each object
+                for obj, score in results:
+                    if obj in all_results:
+                        # Keep the maximum score from all queries
+                        all_results[obj] = max(all_results[obj], score)
+                    else:
+                        all_results[obj] = score
+            except Exception as e:
+                logger.warning(f"[PARALLEL_SEARCH] Error searching with query '{query}': {e}")
+    # Apply exact match boost if text_fields provided
+    if text_fields:
+        boosted_results = []
+        for obj, score in all_results.items():
+            boost = calculate_exact_match_boost(obj, queries[0], text_fields)  # Use first query for boost
+            # Combine vector score with exact match boost (weighted)
+            combined_score = score * 0.8 + boost * 0.2  # 80% vector, 20% exact match
+            boosted_results.append((obj, combined_score))
+        all_results_list = boosted_results
+    else:
+        all_results_list = list(all_results.items())
+    # Sort by score descending
+    all_results_list.sort(key=lambda x: x[1], reverse=True)
+    return all_results_list[:final_top_k]
+def _single_query_search(
+    query: str,
+    queryset: QuerySet,
+    top_k: int = 20,
+    text_fields: Optional[List[str]] = None
+) -> List[Tuple[Any, float]]:
+    """
+    Single query vector search with exact match boost.
+    Args:
+        query: Search query string.
+        queryset: Django QuerySet to search.
+        top_k: Maximum number of results.
+        text_fields: Optional list of field names for exact match boost.
+    Returns:
+        List of (object, score) tuples, sorted by score descending.
+    """
+    # Get vector scores
+    vector_results = get_vector_scores(queryset, query, top_k=top_k)
+    if not text_fields:
+        return vector_results[:top_k]
+    # Apply exact match boost
+    boosted_results = []
+    for obj, score in vector_results:
+        boost = calculate_exact_match_boost(obj, query, text_fields)
+        # Combine vector score with exact match boost (weighted)
+        combined_score = score * 0.8 + boost * 0.2  # 80% vector, 20% exact match
+        boosted_results.append((obj, combined_score))
+    # Sort by combined score
+    boosted_results.sort(key=lambda x: x[1], reverse=True)
+    return boosted_results[:top_k]
+def pure_semantic_search(
+    queries: List[str],
+    queryset: QuerySet,
+    top_k: int = 20,
+    text_fields: Optional[List[str]] = None
+) -> List[Any]:
+    """
+    Pure semantic search (100% vector, no BM25).
+    This is the recommended search strategy when using Query Rewrite + BGE-M3.
+    All top systems have moved away from hybrid search to pure semantic since Oct 2025.
+    Args:
+        queries: List of queries (1 query or 3-5 queries from Query Rewrite).
+        queryset: Django QuerySet to search.
+        top_k: Maximum number of results.
+        text_fields: Optional list of field names for exact match boost.
+    Returns:
+        List of objects sorted by score (highest first).
+    Usage:
+        # Single query
+        results = pure_semantic_search(["mức phạt vi phạm"], queryset, top_k=20)
+        # Multiple queries (from Query Rewrite)
+        rewritten_queries = query_rewriter.rewrite_query("mức phạt vi phạm")
+        results = pure_semantic_search(rewritten_queries, queryset, top_k=20)
+    """
+    if not queries:
+        return []
+    if len(queries) == 1:
+        # Single query - direct search
+        results = _single_query_search(queries[0], queryset, top_k=top_k, text_fields=text_fields)
+    else:
+        # Multiple queries - parallel search
+        results = parallel_vector_search(
+            queries,
+            queryset,
+            top_k_per_query=max(5, top_k // len(queries)),
+            final_top_k=top_k,
+            text_fields=text_fields
+        )
+    # Return just the objects (without scores)
+    return [obj for obj, _ in results]

backend/hue_portal/core/query_rewriter.py ADDED Viewed

	@@ -0,0 +1,348 @@

+"""
+Query Rewriter - Rewrite user queries into 3-5 optimized legal queries.
+This module implements the Query Rewrite Strategy - the "best practice" approach
+used by top legal RAG systems in 2025, achieving >99.9% accuracy.
+"""
+import os
+import logging
+import hashlib
+import json
+from typing import List, Dict, Any, Optional
+logger = logging.getLogger(__name__)
+class QueryRewriter:
+    """
+    Rewrite user queries into 3-5 optimized legal queries for better search results.
+    This is the core of Query Rewrite Strategy - instead of using LLM to suggest
+    documents (which can hallucinate), we rewrite the query into multiple variations
+    and use pure vector search to find the best documents.
+    """
+    def __init__(self, llm_generator=None, use_cache: bool = True):
+        """
+        Initialize Query Rewriter.
+        Args:
+            llm_generator: Optional LLMGenerator instance. If None, will get from llm_integration.
+            use_cache: Whether to use Redis cache for query rewrites (default: True).
+        """
+        if llm_generator is None:
+            try:
+                from hue_portal.chatbot.llm_integration import get_llm_generator
+                self.llm_generator = get_llm_generator()
+            except Exception as e:
+                logger.warning(f"[QUERY_REWRITER] Failed to get LLM generator: {e}")
+                self.llm_generator = None
+        else:
+            self.llm_generator = llm_generator
+        # Initialize Redis cache if available
+        self.use_cache = use_cache
+        self.cache = None
+        if self.use_cache:
+            try:
+                from hue_portal.core.redis_cache import get_redis_cache
+                self.cache = get_redis_cache()
+                if not self.cache.is_available():
+                    logger.info("[QUERY_REWRITER] Redis cache not available, caching disabled")
+                    self.cache = None
+            except Exception as e:
+                logger.warning(f"[QUERY_REWRITER] Failed to initialize cache: {e}")
+                self.cache = None
+    def rewrite_query(
+        self,
+        user_query: str,
+        context: Optional[List[Dict[str, str]]] = None,
+        max_queries: int = 5,
+        min_queries: int = 3
+    ) -> List[str]:
+        """
+        Rewrite a user query into 3-5 optimized legal queries.
+        Args:
+            user_query: Original user query string.
+            context: Optional conversation context (list of {role, content} dicts).
+            max_queries: Maximum number of queries to generate (default: 5).
+            min_queries: Minimum number of queries to generate (default: 3).
+        Returns:
+            List of rewritten queries (3-5 queries).
+        Examples:
+            Input: "điều 12 nói gì"
+            Output: [
+                "nội dung điều 12",
+                "quy định điều 12",
+                "điều 12 quy định về",
+                "điều 12 quy định gì",
+                "điều 12 quy định như thế nào"
+            ]
+            Input: "mức phạt vi phạm"
+            Output: [
+                "mức phạt vi phạm",
+                "khung hình phạt",
+                "mức xử phạt",
+                "phạt vi phạm",
+                "xử phạt vi phạm"
+            ]
+        """
+        if not user_query or not user_query.strip():
+            return []
+        user_query = user_query.strip()
+        # Check cache first
+        if self.cache and self.cache.is_available():
+            cache_key = f"query_rewrite:{self.get_cache_key(user_query, context=context)}"
+            cached_queries = self.cache.get(cache_key)
+            if cached_queries and isinstance(cached_queries, list):
+                logger.info(f"[QUERY_REWRITER] ✅ Cache hit for query rewrite")
+                return cached_queries[:max_queries]
+        # Try LLM-based rewrite first
+        if self.llm_generator and self.llm_generator.is_available():
+            try:
+                rewritten = self._rewrite_with_llm(
+                    user_query,
+                    context=context,
+                    max_queries=max_queries,
+                    min_queries=min_queries
+                )
+                if rewritten and len(rewritten) >= min_queries:
+                    logger.info(f"[QUERY_REWRITER] ✅ LLM rewrite: {len(rewritten)} queries")
+                    final_queries = rewritten[:max_queries]
+                    # Cache the result
+                    if self.cache and self.cache.is_available():
+                        cache_key = f"query_rewrite:{self.get_cache_key(user_query, context=context)}"
+                        self.cache.set(cache_key, final_queries, ttl_seconds=CACHE_QUERY_REWRITE_TTL)
+                        logger.debug(f"[QUERY_REWRITER] Cached query rewrite (TTL: {CACHE_QUERY_REWRITE_TTL}s)")
+                    return final_queries
+            except Exception as e:
+                logger.warning(f"[QUERY_REWRITER] LLM rewrite failed: {e}, using fallback")
+        # Fallback to rule-based rewrite
+        return self._rewrite_fallback(user_query, max_queries=max_queries, min_queries=min_queries)
+    def _rewrite_with_llm(
+        self,
+        user_query: str,
+        context: Optional[List[Dict[str, str]]] = None,
+        max_queries: int = 5,
+        min_queries: int = 3
+    ) -> List[str]:
+        """
+        Rewrite query using LLM.
+        Args:
+            user_query: Original user query.
+            context: Optional conversation context.
+            max_queries: Maximum queries to generate.
+            min_queries: Minimum queries to generate.
+        Returns:
+            List of rewritten queries.
+        """
+        # Build context summary
+        context_text = ""
+        if context:
+            recent_user_messages = [
+                msg.get("content", "")
+                for msg in context[-3:]  # Last 3 messages
+                if msg.get("role") == "user"
+            ]
+            if recent_user_messages:
+                context_text = " ".join(recent_user_messages)
+        # Build prompt for query rewriting
+        prompt = (
+            "Bạn là trợ lý pháp luật chuyên nghiệp. Nhiệm vụ của bạn là viết lại câu hỏi của người dùng "
+            "thành {max_queries} câu hỏi chuẩn pháp lý tối ưu nhất để tìm kiếm trong cơ sở dữ liệu văn bản pháp luật.\n\n"
+            "Câu hỏi gốc: \"{user_query}\"\n\n"
+            "{context_section}"
+            "Yêu cầu:\n"
+            "1. Viết lại thành {max_queries} câu hỏi khác nhau, mỗi câu hỏi tập trung vào một khía cạnh của vấn đề\n"
+            "2. Sử dụng thuật ngữ pháp lý chuẩn (ví dụ: 'quy định', 'điều', 'khoản', 'mức phạt', 'khung hình phạt')\n"
+            "3. Các câu hỏi nên bao quát nhiều cách diễn đạt khác nhau của cùng một vấn đề\n"
+            "4. Giữ nguyên ý nghĩa chính của câu hỏi gốc\n"
+            "5. Mỗi câu hỏi nên ngắn gọn, rõ ràng (10-20 từ)\n\n"
+            "Trả về JSON với dạng:\n"
+            "{{\n"
+            '  "queries": ["câu hỏi 1", "câu hỏi 2", "câu hỏi 3", ...]\n'
+            "}}\n"
+            "Chỉ in JSON, không thêm lời giải thích khác."
+        ).format(
+            max_queries=max_queries,
+            user_query=user_query,
+            context_section=(
+                f"Ngữ cảnh cuộc hội thoại: {context_text}\n\n"
+                if context_text else ""
+            )
+        )
+        # Generate with LLM
+        raw = self.llm_generator._generate_from_prompt(prompt)
+        if not raw:
+            return []
+        # Parse JSON response
+        parsed = self.llm_generator._extract_json_payload(raw)
+        if not parsed:
+            return []
+        queries = parsed.get("queries") or []
+        if not isinstance(queries, list):
+            return []
+        # Filter and validate queries
+        valid_queries = []
+        for q in queries:
+            if isinstance(q, str):
+                q = q.strip()
+                if q and len(q) > 3:  # Minimum length
+                    valid_queries.append(q)
+        # Ensure we have at least min_queries
+        if len(valid_queries) < min_queries:
+            # Add original query if not already present
+            if user_query not in valid_queries:
+                valid_queries.insert(0, user_query)
+            # Generate additional variations using fallback
+            fallback_queries = self._rewrite_fallback(
+                user_query,
+                max_queries=max_queries - len(valid_queries),
+                min_queries=0
+            )
+            valid_queries.extend(fallback_queries)
+        # Remove duplicates while preserving order
+        seen = set()
+        unique_queries = []
+        for q in valid_queries:
+            q_lower = q.lower()
+            if q_lower not in seen:
+                seen.add(q_lower)
+                unique_queries.append(q)
+        return unique_queries[:max_queries]
+    def _rewrite_fallback(
+        self,
+        user_query: str,
+        max_queries: int = 5,
+        min_queries: int = 3
+    ) -> List[str]:
+        """
+        Fallback rule-based query rewriting.
+        This generates query variations using simple patterns when LLM is not available.
+        Args:
+            user_query: Original user query.
+            max_queries: Maximum queries to generate.
+            min_queries: Minimum queries to generate.
+        Returns:
+            List of rewritten queries.
+        """
+        queries = [user_query]  # Always include original
+        query_lower = user_query.lower()
+        query_words = query_lower.split()
+        # Pattern 1: Add "quy định" if not present
+        if "quy định" not in query_lower and "quy định" not in query_lower:
+            if len(query_words) > 1:
+                queries.append(f"quy định {user_query}")
+                queries.append(f"{user_query} quy định")
+        # Pattern 2: Add "nội dung" for "điều" queries
+        if "điều" in query_lower:
+            # Extract điều number if possible
+            for word in query_words:
+                if "điều" in word.lower():
+                    idx = query_words.index(word)
+                    if idx + 1 < len(query_words):
+                        next_word = query_words[idx + 1]
+                        queries.append(f"nội dung điều {next_word}")
+                        queries.append(f"quy định điều {next_word}")
+                        break
+        # Pattern 3: Add "mức phạt" variations for fine-related queries
+        if any(kw in query_lower for kw in ["phạt", "vi phạm", "xử phạt"]):
+            if "mức phạt" not in query_lower:
+                queries.append(f"mức phạt {user_query}")
+            if "khung hình phạt" not in query_lower:
+                queries.append(f"khung hình phạt {user_query}")
+        # Pattern 4: Add "thủ tục" variations for procedure queries
+        if any(kw in query_lower for kw in ["thủ tục", "hồ sơ", "giấy tờ"]):
+            if "thủ tục" not in query_lower:
+                queries.append(f"thủ tục {user_query}")
+        # Remove duplicates while preserving order
+        seen = set()
+        unique_queries = []
+        for q in queries:
+            q_lower = q.lower()
+            if q_lower not in seen:
+                seen.add(q_lower)
+                unique_queries.append(q)
+        # Ensure minimum queries
+        while len(unique_queries) < min_queries:
+            # Add simple variations
+            if len(query_words) > 1:
+                # Reverse word order
+                reversed_query = " ".join(reversed(query_words))
+                if reversed_query.lower() not in seen:
+                    unique_queries.append(reversed_query)
+                    seen.add(reversed_query.lower())
+            else:
+                break
+        return unique_queries[:max_queries]
+    def get_cache_key(self, user_query: str, context: Optional[List[Dict[str, str]]] = None) -> str:
+        """
+        Generate cache key for query rewrite.
+        Args:
+            user_query: Original user query.
+            context: Optional conversation context.
+        Returns:
+            Cache key string.
+        """
+        # Create hash from query and context
+        cache_data = {
+            "query": user_query.strip().lower(),
+            "context": [
+                {"role": msg.get("role"), "content": msg.get("content", "")[:100]}
+                for msg in (context or [])[-3:]  # Last 3 messages only
+            ]
+        }
+        cache_str = json.dumps(cache_data, sort_keys=True, ensure_ascii=False)
+        return hashlib.sha256(cache_str.encode("utf-8")).hexdigest()
+def get_query_rewriter(llm_generator=None) -> QueryRewriter:
+    """
+    Get or create QueryRewriter instance.
+    Args:
+        llm_generator: Optional LLMGenerator instance.
+    Returns:
+        QueryRewriter instance.
+    """
+    return QueryRewriter(llm_generator=llm_generator)

backend/hue_portal/core/redis_cache.py ADDED Viewed

	@@ -0,0 +1,240 @@

+"""
+Redis Cache Layer for Query Rewrite and Prefetch Results.
+This module provides Redis caching for:
+- Query rewrite results (1000 queries, TTL 1 hour)
+- Prefetch results by document_code (TTL 30 minutes)
+Supports Upstash and Railway Redis free tier.
+"""
+import os
+import logging
+import json
+from typing import Optional, Dict, Any, List
+from datetime import timedelta
+logger = logging.getLogger(__name__)
+# Try to import redis
+try:
+    import redis
+    REDIS_AVAILABLE = True
+except ImportError:
+    REDIS_AVAILABLE = False
+    logger.warning("[REDIS] redis package not installed. Install with: pip install redis")
+class RedisCache:
+    """
+    Redis cache manager for query rewrites and prefetch results.
+    Supports graceful degradation if Redis is unavailable.
+    """
+    def __init__(self, redis_url: Optional[str] = None):
+        """
+        Initialize Redis cache.
+        Args:
+            redis_url: Redis connection URL. If None, reads from REDIS_URL env var.
+        """
+        self.redis_url = redis_url or os.environ.get("REDIS_URL")
+        self.client: Optional[redis.Redis] = None
+        self._connected = False
+        if not REDIS_AVAILABLE:
+            logger.warning("[REDIS] Redis package not available, caching disabled")
+            return
+        if not self.redis_url:
+            logger.warning("[REDIS] REDIS_URL not configured, caching disabled")
+            return
+        self._connect()
+    def _connect(self) -> None:
+        """Connect to Redis server."""
+        if not REDIS_AVAILABLE or not self.redis_url:
+            return
+        try:
+            # Parse Redis URL
+            # Format: redis://[:password@]host[:port][/db]
+            # Or: rediss:// for SSL
+            self.client = redis.from_url(
+                self.redis_url,
+                decode_responses=True,  # Auto-decode strings
+                socket_connect_timeout=5,
+                socket_timeout=5,
+                retry_on_timeout=True,
+                health_check_interval=30
+            )
+            # Test connection
+            self.client.ping()
+            self._connected = True
+            logger.info("[REDIS] ✅ Connected to Redis successfully")
+        except Exception as e:
+            logger.warning(f"[REDIS] Failed to connect to Redis: {e}, caching disabled")
+            self.client = None
+            self._connected = False
+    def is_available(self) -> bool:
+        """Check if Redis is available and connected."""
+        if not self._connected or not self.client:
+            return False
+        try:
+            self.client.ping()
+            return True
+        except Exception:
+            self._connected = False
+            return False
+    def get(self, key: str) -> Optional[Any]:
+        """
+        Get value from cache.
+        Args:
+            key: Cache key.
+        Returns:
+            Cached value or None if not found.
+        """
+        if not self.is_available():
+            return None
+        try:
+            value = self.client.get(key)
+            if value is None:
+                return None
+            # Try to parse as JSON
+            try:
+                return json.loads(value)
+            except (json.JSONDecodeError, TypeError):
+                # Return as string if not JSON
+                return value
+        except Exception as e:
+            logger.warning(f"[REDIS] Error getting key '{key}': {e}")
+            return None
+    def set(
+        self,
+        key: str,
+        value: Any,
+        ttl_seconds: Optional[int] = None
+    ) -> bool:
+        """
+        Set value in cache.
+        Args:
+            key: Cache key.
+            value: Value to cache (will be JSON-encoded if dict/list).
+            ttl_seconds: Time to live in seconds. If None, no expiration.
+        Returns:
+            True if successful, False otherwise.
+        """
+        if not self.is_available():
+            return False
+        try:
+            # Serialize value to JSON if it's a dict/list
+            if isinstance(value, (dict, list)):
+                serialized = json.dumps(value, ensure_ascii=False)
+            else:
+                serialized = str(value)
+            if ttl_seconds:
+                self.client.setex(key, ttl_seconds, serialized)
+            else:
+                self.client.set(key, serialized)
+            return True
+        except Exception as e:
+            logger.warning(f"[REDIS] Error setting key '{key}': {e}")
+            return False
+    def delete(self, key: str) -> bool:
+        """
+        Delete key from cache.
+        Args:
+            key: Cache key.
+        Returns:
+            True if successful, False otherwise.
+        """
+        if not self.is_available():
+            return False
+        try:
+            self.client.delete(key)
+            return True
+        except Exception as e:
+            logger.warning(f"[REDIS] Error deleting key '{key}': {e}")
+            return False
+    def exists(self, key: str) -> bool:
+        """
+        Check if key exists in cache.
+        Args:
+            key: Cache key.
+        Returns:
+            True if key exists, False otherwise.
+        """
+        if not self.is_available():
+            return False
+        try:
+            return self.client.exists(key) > 0
+        except Exception:
+            return False
+    def clear_pattern(self, pattern: str) -> int:
+        """
+        Clear all keys matching pattern.
+        Args:
+            pattern: Redis key pattern (e.g., "query_rewrite:*").
+        Returns:
+            Number of keys deleted.
+        """
+        if not self.is_available():
+            return 0
+        try:
+            keys = self.client.keys(pattern)
+            if keys:
+                return self.client.delete(*keys)
+            return 0
+        except Exception as e:
+            logger.warning(f"[REDIS] Error clearing pattern '{pattern}': {e}")
+            return 0
+# Singleton instance
+_redis_cache_instance: Optional[RedisCache] = None
+def get_redis_cache(redis_url: Optional[str] = None) -> RedisCache:
+    """
+    Get or create Redis cache instance.
+    Args:
+        redis_url: Optional Redis URL. If None, uses REDIS_URL env var.
+    Returns:
+        RedisCache instance.
+    """
+    global _redis_cache_instance
+    if _redis_cache_instance is None:
+        _redis_cache_instance = RedisCache(redis_url=redis_url)
+    return _redis_cache_instance