"""
Load places from the LD2/LD3 lab databases into the Django application database.

Sources
-------
LD2  databases/bigdata_ld2_database.sqlite3
       final_features   — OSM tags (historic, tourism, amenity, shop, leisure,
                          man_made, memorial, artwork_type, heritage) for all cities
       intermediate_data — raw OSM tags including website, phone, opening_hours

LD3  databases/ld3_claude.sqlite3
       ld3_object       — PageRank scores + Wikidata labels/descriptions for ALL cities

Cities loaded: Berlin, London, New York, Paris

Usage:
    python manage.py load_data
    python manage.py load_data --clear
"""
import os
import re
import sqlite3
from django.core.management.base import BaseCommand
from django.conf import settings
from django.db import transaction


CITY_SLUGS = {
    'Berlin':   'berlin',
    'London':   'london',
    'New York': 'new_york',
    'Paris':    'paris',
}


def _slugify(name):
    return CITY_SLUGS.get(name, re.sub(r'[^a-z0-9]+', '_', name.lower()).strip('_'))


class Command(BaseCommand):
    help = 'Load cities and places from LD2/LD3 lab databases'

    def add_arguments(self, parser):
        parser.add_argument('--clear', action='store_true',
                            help='Delete all existing data before loading')

    def handle(self, *args, **options):
        from places.models import City, Category, Place, PlaceCategory, ImageFeature, PlaceImage

        ld2_path = getattr(settings, 'LD2_DB_PATH', '')
        ld3_path = getattr(settings, 'LD3_DB_PATH', '')

        if not os.path.exists(ld2_path):
            self.stderr.write(f'LD2 database not found: {ld2_path}')
            return
        if not os.path.exists(ld3_path):
            self.stderr.write(f'LD3 database not found: {ld3_path}')
            return

        if options['clear']:
            self.stdout.write('Clearing existing data...')
            ImageFeature.objects.all().delete()
            PlaceImage.objects.all().delete()
            PlaceCategory.objects.all().delete()
            Place.objects.all().delete()
            Category.objects.all().delete()
            City.objects.all().delete()
            self.stdout.write(self.style.SUCCESS('Cleared.'))

        ld2 = sqlite3.connect(ld2_path)
        ld2.row_factory = sqlite3.Row
        ld3 = sqlite3.connect(ld3_path)
        ld3.row_factory = sqlite3.Row

        # ---- 1. Build PageRank lookup from ld3_object (ALL cities) ----
        self.stdout.write('Loading PageRank and Wikidata enrichment from LD3...')
        ld3_cur = ld3.cursor()
        ld3_cur.execute(
            'SELECT city_name, osm_id, osm_type, pagerank, wd_label_en, wd_description_en '
            'FROM ld3_object'
        )
        ld3_map = {}  # (city_name, osm_id, osm_type) -> row
        for row in ld3_cur.fetchall():
            key = (row['city_name'], row['osm_id'], row['osm_type'])
            ld3_map[key] = row
        self.stdout.write(f'  {len(ld3_map)} LD3 entries loaded')

        # ---- 2. Build contact-info lookup from intermediate_data ----
        self.stdout.write('Loading contact info from LD2 intermediate_data...')
        ld2_cur = ld2.cursor()
        ld2_cur.execute(
            'SELECT city_name, osm_id, osm_type, tag_website, tag_phone, tag_opening_hours '
            'FROM intermediate_data'
        )
        contact_map = {}  # (city_name, osm_id, osm_type) -> row
        for row in ld2_cur.fetchall():
            key = (row['city_name'], row['osm_id'], row['osm_type'])
            contact_map[key] = row
        self.stdout.write(f'  {len(contact_map)} contact entries loaded')

        # ---- 3. Load places from final_features ----
        self.stdout.write('Loading places from LD2 final_features...')
        ld2_cur.execute('SELECT * FROM final_features ORDER BY city_name, id')
        all_rows = ld2_cur.fetchall()

        # Group by city
        city_rows = {}
        for row in all_rows:
            city_rows.setdefault(row['city_name'], []).append(row)

        tag_fields = ['historic', 'tourism', 'amenity', 'shop', 'leisure',
                      'man_made', 'memorial', 'artwork_type', 'heritage']

        total_places = 0

        for city_name, rows in city_rows.items():
            slug = _slugify(city_name)
            city, _ = City.objects.update_or_create(
                slug=slug,
                defaults={'name': city_name, 'bbox': None}
            )
            self.stdout.write(self.style.MIGRATE_HEADING(
                f'\n=== {city_name}: {len(rows)} places ==='))

            # Pre-create categories
            cat_names = set()
            for row in rows:
                for field in tag_fields:
                    val = row[field]
                    if val:
                        cat_names.add(f'{field}:{val}')
            for cat_name in cat_names:
                Category.objects.get_or_create(name=cat_name)

            # Collect raw PageRank values for normalisation
            pr_values = []
            for row in rows:
                key = (city_name, row['osm_id'], row['osm_type'])
                ld3_row = ld3_map.get(key)
                if ld3_row and ld3_row['pagerank'] is not None:
                    pr_values.append(ld3_row['pagerank'])

            pr_min = min(pr_values) if pr_values else 0.0
            pr_max = max(pr_values) if pr_values else 1.0
            pr_range = pr_max - pr_min if pr_max > pr_min else 1.0

            self.stdout.write(
                f'  PageRank: {len(pr_values)} entries, '
                f'range [{pr_min:.6f}, {pr_max:.6f}]'
            )

            place_count = 0
            with transaction.atomic():
                for row in rows:
                    key = (city_name, row['osm_id'], row['osm_type'])
                    ld3_row = ld3_map.get(key)
                    contact_row = contact_map.get(key)

                    # Name: prefer Wikidata English label
                    name = (
                        (ld3_row['wd_label_en'] if ld3_row and ld3_row['wd_label_en'] else None)
                        or row['name']
                        or ''
                    )
                    if not name:
                        continue

                    # Description from Wikidata
                    description = ld3_row['wd_description_en'] if ld3_row else None

                    # Contact info
                    website = opening_hours = phone = None
                    if contact_row:
                        website = (contact_row['tag_website'] or '')[:500] or None
                        phone = contact_row['tag_phone']
                        opening_hours = (contact_row['tag_opening_hours'] or '')[:500] or None

                    # Interest score from normalised PageRank
                    if ld3_row and ld3_row['pagerank'] is not None:
                        interest_score = (ld3_row['pagerank'] - pr_min) / pr_range
                    else:
                        # Heuristic fallback for places without PageRank
                        score = 0.0
                        if row['wikidata_qid']:
                            score += 0.5
                        if row['tourism']:
                            score += 0.3
                        if row['historic']:
                            score += 0.2
                        interest_score = min(score, 1.0)

                    place, _ = Place.objects.update_or_create(
                        city=city,
                        osm_id=row['osm_id'],
                        osm_type=row['osm_type'] or 'node',
                        defaults={
                            'name': name[:500],
                            'lat': row['lat'] or 0.0,
                            'lon': row['lon'] or 0.0,
                            'wikidata_qid': row['wikidata_qid'],
                            'description': description,
                            'website': website,
                            'phone': phone,
                            'opening_hours': opening_hours,
                            'historic': row['historic'],
                            'tourism': row['tourism'],
                            'amenity': row['amenity'],
                            'shop': row['shop'],
                            'leisure': row['leisure'],
                            'man_made': row['man_made'],
                            'memorial': row['memorial'],
                            'artwork_type': row['artwork_type'],
                            'heritage': row['heritage'],
                            'pagerank': ld3_row['pagerank'] if ld3_row else None,
                            'interest_score': interest_score,
                        }
                    )

                    # Link categories
                    for field in tag_fields:
                        val = row[field]
                        if val:
                            try:
                                cat = Category.objects.get(name=f'{field}:{val}')
                                PlaceCategory.objects.get_or_create(place=place, category=cat)
                            except Category.DoesNotExist:
                                pass

                    place_count += 1

            city.place_count = place_count
            city.save()
            total_places += place_count
            self.stdout.write(self.style.SUCCESS(
                f'  Loaded {place_count} places for {city_name}'))

        ld2.close()
        ld3.close()

        self.stdout.write(self.style.SUCCESS(
            f'\nDone! {total_places} places across {len(city_rows)} cities.'))
