import openpyxl
from datetime import datetime
import json
from collections import defaultdict
import re

file_path = '/Users/syahrilfadli/Documents/App - Syahril/presensi/backend/storage/DAFTAR_SPPD_2021.xlsx'

wb = openpyxl.load_workbook(file_path, data_only=True)

# Columns mapping (1-indexed)
# Col 6: MAKSUD DAN TUJUAN (keterangan)
# Col 9: TEMPAT TUJUAN (lokasi)
# Col 11: TANGGAL BERANGKAT
# Col 12: TANGGAL KEMBALI
# Col 18: ST (nomor ST)

def parse_date(val):
    """Parse date from various formats"""
    if val is None:
        return None
    if isinstance(val, datetime):
        return val.strftime('%Y-%m-%d')
    if isinstance(val, str):
        # Try common formats
        for fmt in ['%Y-%m-%d', '%d/%m/%Y', '%d-%m-%Y', '%d %B %Y', '%d %b %Y']:
            try:
                return datetime.strptime(val.strip(), fmt).strftime('%Y-%m-%d')
            except:
                continue
    return None

def normalize_location(loc):
    """Normalize location name for consistency"""
    if not loc:
        return None
    loc = str(loc).strip()
    # Remove trailing dots and commas
    loc = re.sub(r'[.,]+$', '', loc)
    # Normalize common variations
    normalizations = {
        r'Perwakilan BPKP Provinsi Bali.*': 'Perwakilan BPKP Provinsi Bali, Denpasar',
        r'Perwakilan BPKP.*Sulawesi Selatan.*': 'Perwakilan BPKP Provinsi Sulawesi Selatan, Makassar',
        r'Perwakilan BPKP.*Sumatera Utara.*': 'Perwakilan BPKP Provinsi Sumatera Utara, Medan',
        r'Mercure.*Cikini.*': 'Mercure Hotel Jakarta Cikini',
    }
    for pattern, replacement in normalizations.items():
        if re.match(pattern, loc, re.IGNORECASE):
            return replacement
    return loc

def extract_sheet_data(ws, sheet_name, filter_func=None):
    """Extract and group ST data from sheet"""
    # Group by: no_st + tempat_tujuan (merge different dates into one range)
    st_groups = defaultdict(lambda: {
        'count': 0,
        'keterangan': None,
        'tempat_tujuan': None,
        'tanggal_berangkat': None,  # Will store earliest date
        'tanggal_kembali': None,    # Will store latest date
        'no_st': None
    })

    skipped = 0
    processed = 0
    filtered = 0

    for row in range(2, ws.max_row + 1):
        no_st = ws.cell(row=row, column=18).value  # ST number
        tempat_tujuan = ws.cell(row=row, column=9).value  # Destination
        tanggal_berangkat = ws.cell(row=row, column=11).value
        tanggal_kembali = ws.cell(row=row, column=12).value
        keterangan = ws.cell(row=row, column=6).value

        # Skip if essential data missing
        if not tempat_tujuan or not tanggal_berangkat:
            skipped += 1
            continue

        # Parse dates
        tgl_berangkat = parse_date(tanggal_berangkat)
        tgl_kembali = parse_date(tanggal_kembali)

        if not tgl_berangkat:
            skipped += 1
            continue

        # Apply custom filter function
        if filter_func:
            berangkat_date = datetime.strptime(tgl_berangkat, '%Y-%m-%d')
            if not filter_func(berangkat_date):
                filtered += 1
                continue

        # Use tanggal_kembali or tanggal_berangkat if kembali is missing
        if not tgl_kembali:
            tgl_kembali = tgl_berangkat

        # Normalize location
        normalized_loc = normalize_location(tempat_tujuan)

        # Extract year from tanggal_berangkat for grouping (ST numbers reset each year)
        year = tgl_berangkat[:4]  # Get year from YYYY-MM-DD format

        # Create group key by year + no_st + location (merge dates into range)
        key = f"{year}|{no_st or 'NO_ST'}|{normalized_loc}"

        st_groups[key]['count'] += 1
        st_groups[key]['keterangan'] = keterangan
        st_groups[key]['tempat_tujuan'] = normalized_loc
        st_groups[key]['no_st'] = str(no_st).strip() if no_st else None

        # Update date range - take earliest berangkat and latest kembali
        if st_groups[key]['tanggal_berangkat'] is None or tgl_berangkat < st_groups[key]['tanggal_berangkat']:
            st_groups[key]['tanggal_berangkat'] = tgl_berangkat
        if st_groups[key]['tanggal_kembali'] is None or tgl_kembali > st_groups[key]['tanggal_kembali']:
            st_groups[key]['tanggal_kembali'] = tgl_kembali

        processed += 1

    print(f"  Processed: {processed}, Skipped: {skipped}, Filtered: {filtered}, Unique ST: {len(st_groups)}")
    return list(st_groups.values())

# Known location coordinates (lat/long from database and Google Maps)
KNOWN_LOCATIONS = {
    'pusdiklatwas': {
        'name': 'Pusdiklatwas BPKP',
        'latitude': -6.64965340,
        'longitude': 106.85379130,
        'address': 'Jl. Beringin II, Pandansari, Kec. Ciawi, Kabupaten Bogor, Jawa Barat 16720'
    },
    'bpkp_sulsel': {
        'name': 'Perwakilan BPKP Provinsi Sulawesi Selatan',
        'latitude': -5.1511764,
        'longitude': 119.4326813,
        'address': 'Jl. Andi Pangeran Pettarani No.10, Makassar, Sulawesi Selatan'
    },
    'bpkp_sumut': {
        'name': 'Perwakilan BPKP Provinsi Sumatera Utara',
        'latitude': 3.5639098,
        'longitude': 98.6576689,
        'address': 'Jl. Jend. Gatot Subroto Km. 5,5, Medan 20122'
    },
    'bpkp_bali': {
        'name': 'Perwakilan BPKP Provinsi Bali',
        'latitude': -8.6478861,
        'longitude': 115.2191175,
        'address': 'Jl. Melati No.5, Dangin Puri Kangin, Denpasar Utara, Denpasar, Bali 80233'
    },
    'bpkp_diy': {
        'name': 'Perwakilan BPKP Provinsi DIY',
        'latitude': -7.8346543,
        'longitude': 110.3747234,
        'address': 'Jl. Parangtritis KM 5,5, Panggungharjo, Sewon, Bantul, DIY 55188'
    },
    'bpkp_jabar': {
        'name': 'Perwakilan BPKP Provinsi Jawa Barat',
        'latitude': -6.9147521,
        'longitude': 107.5573815,
        'address': 'Jl. Jend. H. Amir Machmud No.50, Cibeureum, Cimahi Selatan, Bandung, Jawa Barat'
    },
    'hermitage_jakarta': {
        'name': 'The Hermitage Hotel Jakarta',
        'latitude': -6.1963827,
        'longitude': 106.8372583,
        'address': 'Jl. Cilacap No.1, Menteng, Jakarta Pusat 10310'
    },
    'mercure_cikini': {
        'name': 'Mercure Hotel Jakarta Cikini',
        'latitude': -6.1914178,
        'longitude': 106.8419234,
        'address': 'Jl. Cikini Raya No.66, Cikini, Menteng, Jakarta Pusat 10330'
    },
    'novotel_cikini': {
        'name': 'Novotel Jakarta Cikini',
        'latitude': -6.1943248,
        'longitude': 106.8441127,
        'address': 'Jl. Cikini Raya No.107-109, Cikini, Menteng, Jakarta Pusat 10330'
    },
    'bigland_bogor': {
        'name': 'Bigland Hotel Bogor',
        'latitude': -6.5945181,
        'longitude': 106.7913752,
        'address': 'Jl. Malabar No.1B, Tegallega, Bogor Tengah, Kota Bogor, Jawa Barat 16127'
    },
    'aone_jakarta': {
        'name': 'AONE Hotel Jakarta',
        'latitude': -6.1859473,
        'longitude': 106.8267821,
        'address': 'Jl. K.H. Wahid Hasyim No.80, Menteng, Jakarta Pusat 10340'
    },
    'sidoarjo': {
        'name': 'Sidoarjo, Jawa Timur',
        'latitude': -7.4478433,
        'longitude': 112.7183117,
        'address': 'Sidoarjo, Jawa Timur'
    },
}

def match_known_location(loc_name):
    """Check if location matches known locations"""
    if not loc_name:
        return None
    loc_lower = loc_name.lower()

    # Pusdiklatwas BPKP
    if 'pusdiklat' in loc_lower or 'ciawi' in loc_lower:
        return KNOWN_LOCATIONS['pusdiklatwas']

    # BPKP Perwakilan
    if 'bpkp' in loc_lower or 'perwakilan' in loc_lower:
        if 'sulawesi selatan' in loc_lower or 'makassar' in loc_lower:
            return KNOWN_LOCATIONS['bpkp_sulsel']
        if 'sumatera utara' in loc_lower or 'medan' in loc_lower:
            return KNOWN_LOCATIONS['bpkp_sumut']
        if 'bali' in loc_lower or 'denpasar' in loc_lower:
            return KNOWN_LOCATIONS['bpkp_bali']
        if 'diy' in loc_lower or 'yogyakarta' in loc_lower:
            return KNOWN_LOCATIONS['bpkp_diy']
        if 'jawa barat' in loc_lower or 'jabar' in loc_lower:
            return KNOWN_LOCATIONS['bpkp_jabar']

    # Hotels
    if 'hermitage' in loc_lower:
        return KNOWN_LOCATIONS['hermitage_jakarta']
    if 'mercure' in loc_lower and 'cikini' in loc_lower:
        return KNOWN_LOCATIONS['mercure_cikini']
    if 'novotel' in loc_lower and 'cikini' in loc_lower:
        return KNOWN_LOCATIONS['novotel_cikini']
    if 'bigland' in loc_lower:
        return KNOWN_LOCATIONS['bigland_bogor']
    if 'aone' in loc_lower or 'a-one' in loc_lower or 'a one' in loc_lower:
        return KNOWN_LOCATIONS['aone_jakarta']

    # Sidoarjo
    if 'sidoarjo' in loc_lower:
        return KNOWN_LOCATIONS['sidoarjo']

    return None

# Process each sheet
all_st = []

# Filter: 2024 from September onwards
def filter_2024_september(date):
    return date.year == 2024 and date.month >= 9

# Filter: 2025 data only (some sheets have mixed years)
def filter_2025(date):
    return date.year == 2025

# Filter: 2026 data only
def filter_2026(date):
    return date.year == 2026

# 2024 - start from September
print("Processing DAFTAR 2024 (from September)...")
ws_2024 = wb['DAFTAR 2024']
st_2024 = extract_sheet_data(ws_2024, 'DAFTAR 2024', filter_func=filter_2024_september)
all_st.extend(st_2024)

# 2025 - filter only 2025 dates
print("Processing DAFTAR 2025...")
ws_2025 = wb['DAFTAR 2025']
st_2025 = extract_sheet_data(ws_2025, 'DAFTAR 2025', filter_func=filter_2025)
all_st.extend(st_2025)

# 2026 - filter only 2026 dates
print("Processing DAFTAR 2026...")
ws_2026 = wb['DAFTAR 2026']
st_2026 = extract_sheet_data(ws_2026, 'DAFTAR 2026', filter_func=filter_2026)
all_st.extend(st_2026)

# Sort by tanggal_berangkat
all_st.sort(key=lambda x: x['tanggal_berangkat'] or '0000-00-00')

print(f"\nTotal unique ST records: {len(all_st)}")

# Print sample
print("\n=== Sample ST Data (first 20) ===")
for i, st in enumerate(all_st[:20]):
    print(f"{i+1}. {st['tanggal_berangkat']} - {st['tanggal_kembali']} | {st['tempat_tujuan']} | ST: {st['no_st']} | ({st['count']} orang)")

# Generate PHP seeder data
print("\n\n=== Generating Seeder Data ===")

seeder_data = []
for st in all_st:
    known_loc = match_known_location(st['tempat_tujuan'])

    record = {
        'no_st': st['no_st'],
        'tanggal_mulai': st['tanggal_berangkat'],
        'tanggal_selesai': st['tanggal_kembali'],
        'keterangan': st['keterangan'][:500] if st['keterangan'] and len(st['keterangan']) > 500 else st['keterangan'],
    }

    if known_loc:
        # Use known coordinates from database
        record['custom_location_name'] = known_loc['name']
        record['custom_latitude'] = known_loc['latitude']
        record['custom_longitude'] = known_loc['longitude']
        record['custom_address'] = known_loc['address']
    else:
        # Use custom location without coordinates
        record['custom_location_name'] = st['tempat_tujuan']
        record['custom_latitude'] = None
        record['custom_longitude'] = None
        record['custom_address'] = None

    seeder_data.append(record)

# Save to JSON for seeder
output_file = '/Users/syahrilfadli/Documents/App - Syahril/presensi/backend/storage/st_data.json'
with open(output_file, 'w', encoding='utf-8') as f:
    json.dump(seeder_data, f, ensure_ascii=False, indent=2)

print(f"Saved {len(seeder_data)} records to {output_file}")

# Print unique locations
locations = set()
pusdiklat_count = 0
for st in all_st:
    if st['tempat_tujuan']:
        known = match_known_location(st['tempat_tujuan'])
        if known:
            pusdiklat_count += 1
        else:
            locations.add(st['tempat_tujuan'])

print(f"\n=== Pusdiklatwas Records (with lat/long): {pusdiklat_count} ===")
print(f"\n=== Other Locations ({len(locations)}) - Without Coordinates ===")
for loc in sorted(locations):
    print(f"  - {loc}")
