"""
Script to update all category index.json files to include recipe names alongside slugs.
Uses multiprocessing for fast parallel processing.
"""

import json
import os
import multiprocessing as mp
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
import time

def get_recipe_title(recipe_path):
    """Extract title from a single recipe JSON file."""
    try:
        with open(recipe_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
            return data.get('title', '')
    except Exception as e:
        print(f"Error reading {recipe_path}: {e}")
        return ''

def process_category(category_path):
    """Process a single category directory."""
    category_name = os.path.basename(category_path)
    index_file = os.path.join(category_path, 'index.json')

    if not os.path.exists(index_file):
        print(f"Skipping {category_name}: no index.json found")
        return

    print(f"Processing category: {category_name}")

    # Read current index.json
    try:
        with open(index_file, 'r', encoding='utf-8') as f:
            current_data = json.load(f)
    except Exception as e:
        print(f"Error reading {index_file}: {e}")
        return

    # Check if already in new format (objects with slug and title/name)
    if current_data and isinstance(current_data[0], dict) and ('slug' in current_data[0] or 'title' in current_data[0] or 'name' in current_data[0]):
        print(f"  Category {category_name} already updated, skipping")
        return

    # Old format - array of strings, need to update
    current_slugs = current_data

    # Get all recipe files in this category
    recipe_files = []
    for slug in current_slugs:
        # Extract filename from slug path
        # slug format: "/category_name/filename.json"
        filename = slug.split('/')[-1]  # Get the last part after the last /
        recipe_path = os.path.join(category_path, filename)
        if os.path.exists(recipe_path):
            recipe_files.append((slug, recipe_path))

    print(f"  Found {len(recipe_files)} recipe files")

    if not recipe_files:
        print(f"  No recipe files found for {category_name}, skipping")
        return

    # Process recipes in parallel using threads for I/O operations
    updated_entries = []
    with ThreadPoolExecutor(max_workers=min(20, len(recipe_files))) as executor:
        future_to_slug = {executor.submit(get_recipe_title, recipe_path): (slug, recipe_path)
                         for slug, recipe_path in recipe_files}

        for future in as_completed(future_to_slug):
            slug, recipe_path = future_to_slug[future]
            try:
                title = future.result()
                updated_entries.append({
                    "slug": slug,
                    "name": title
                })
            except Exception as e:
                print(f"Error processing {recipe_path}: {e}")
                # Keep original slug if title extraction fails
                updated_entries.append({
                    "slug": slug,
                    "name": ""
                })

    # Sort by slug to maintain consistent order
    updated_entries.sort(key=lambda x: x['slug'])

    # Write updated index.json
    try:
        with open(index_file, 'w', encoding='utf-8') as f:
            json.dump(updated_entries, f, ensure_ascii=False, indent=2)
        print(f"  Updated {index_file} with {len(updated_entries)} entries")
    except Exception as e:
        print(f"Error writing {index_file}: {e}")

def main():
    base_path = Path('/var/www/html/receitas/public_html')
    
    # Find all category directories (those containing index.json)
    categories = []
    for item in base_path.iterdir():
        if item.is_dir():
            index_file = item / 'index.json'
            if index_file.exists():
                categories.append(item)
    
    print(f"Found {len(categories)} categories to process")
    
    start_time = time.time()
    
    # Process categories in parallel using multiprocessing
    with mp.Pool(processes=min(mp.cpu_count(), len(categories))) as pool:
        pool.map(process_category, categories)
    
    end_time = time.time()
    print(".2f")

if __name__ == '__main__':
    main()
