#!/usr/bin/env python3
"""
Fast parallel recipe indexer
Scans all category index.json files and builds a searchable index
"""

import json
import os
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
import time
import unicodedata
from threading import Lock

class RecipeIndexBuilder:
    def __init__(self, base_path):
        self.base_path = Path(base_path)
        self.recipes = []
        self.lock = Lock()
        self.processed = 0
        
    def normalize_text(self, text):
        """Remove accents and convert to lowercase for better search"""
        if not isinstance(text, str):
            return ""
        nfkd = unicodedata.normalize('NFKD', text.lower())
        return ''.join([c for c in nfkd if not unicodedata.combining(c)])
    
    def load_category_recipes(self, category_path):
        """Load recipes from a single category's index.json"""
        try:
            index_file = category_path / 'index.json'
            if not index_file.exists():
                return []
            
            with open(index_file, 'r', encoding='utf-8') as f:
                recipes_data = json.load(f)
            
            recipes = []
            category_name = category_path.name
            
            for recipe in recipes_data:
                if isinstance(recipe, dict) and 'name' in recipe and 'slug' in recipe:
                    normalized_name = self.normalize_text(recipe['name'])
                    recipes.append({
                        'name': recipe['name'],
                        'normalized': normalized_name,
                        'category': category_name,
                        'slug': recipe['slug']
                    })
            
            return recipes
        except Exception as e:
            print(f"Error processing {category_path}: {e}")
            return []
    
    def process_all_categories(self, max_workers=8):
        """Scan all category directories in parallel"""
        print(f"Starting parallel scan of {self.base_path}...")
        start_time = time.time()
        
        # Get all category directories
        category_dirs = [d for d in self.base_path.iterdir() 
                        if d.is_dir() and (d / 'index.json').exists()]
        
        print(f"Found {len(category_dirs)} categories to process")
        
        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            futures = {executor.submit(self.load_category_recipes, cat_dir): cat_dir 
                      for cat_dir in category_dirs}
            
            for future in as_completed(futures):
                recipes = future.result()
                if recipes:
                    with self.lock:
                        self.recipes.extend(recipes)
                        self.processed += 1
                
                if self.processed % 50 == 0:
                    print(f"Processed {self.processed} categories... "
                          f"({len(self.recipes)} recipes so far)")
        
        elapsed = time.time() - start_time
        print(f"\nCompleted in {elapsed:.2f}s")
        print(f"Total recipes indexed: {len(self.recipes)}")
    
    def save_index(self, output_file):
        """Save the index to a JSON file for fast searching"""
        output_path = Path(output_file)
        output_path.parent.mkdir(parents=True, exist_ok=True)
        
        # Sort by name for consistency
        sorted_recipes = sorted(self.recipes, key=lambda x: x['name'])
        
        with open(output_path, 'w', encoding='utf-8') as f:
            json.dump(sorted_recipes, f, ensure_ascii=False, indent=2)
        
        print(f"Index saved to {output_path}")
        print(f"File size: {output_path.stat().st_size / 1024:.2f} KB")

def main():
    base_path = '/var/www/html/receitas/public_html'
    output_file = '/var/www/html/receitas/public_html/search_index.json'
    
    builder = RecipeIndexBuilder(base_path)
    builder.process_all_categories(max_workers=16)  # Use 16 threads for faster processing
    builder.save_index(output_file)
    
    print("\n✓ Search index built successfully!")

if __name__ == '__main__':
    main()
