#!/usr/bin/env python3
"""
Simple BLE Analysis Script for Picklecon Event
Ready to run on your server - minimal dependencies
"""

import json
import os
from collections import defaultdict, Counter
from datetime import datetime

# Configuration
DATA_DIR = "/var/www/html/CrowdFlow/Picklecon"
EVENT_DATES = ["20250807", "20250808", "20250809", "20250810"]
DAY_NAMES = {
    "20250807": "Thursday",
    "20250808": "Friday",
    "20250809": "Saturday",
    "20250810": "Sunday"
}

def normalize_address(address):
    """Normalize BLE address to uppercase without colons"""
    # Handle different input types
    if isinstance(address, int):
        address = str(address)
    elif not isinstance(address, str):
        return ""
    
    # Address might already be normalized (no colons)
    address = str(address).replace(':', '').upper()
    
    # Pad with leading zeros if needed (some addresses might be truncated)
    if len(address) < 12 and address.isalnum():
        address = address.zfill(12)
    
    return address

def classify_address(address):
    """Classify BLE address based on MSB bits"""
    normalized = normalize_address(address)
    if len(normalized) != 12:
        return "Invalid"
    
    msb = int(normalized[:2], 16)
    type_bits = (msb >> 6) & 0x03
    
    if type_bits == 0b11:
        return "Random Static"
    elif type_bits == 0b01:
        return "Random Private Resolvable"
    elif type_bits == 0b00:
        if 0x00 <= msb <= 0x3F:
            return "Ambiguous (Public or Non-Resolvable)"
        else:
            return "Random Private Non-Resolvable"
    else:  # 0b10
        return "Reserved/Misclassified"

def load_daily_data(date):
    """Load data for a specific date"""
    filename = os.path.join(DATA_DIR, f"Picklecon_combineddwell_flat_{date}.json")
    
    if not os.path.exists(filename):
        print(f"Warning: File not found - {filename}")
        return []
    
    try:
        with open(filename, 'r') as f:
            return json.load(f)
    except Exception as e:
        print(f"Error loading {filename}: {e}")
        return []

def main():
    print("=== BLE EVENT ANALYSIS ===")
    print("Analyzing Picklecon August 7-10, 2025\n")
    
    # Collect all data
    all_addresses = defaultdict(list)  # address -> list of (date, record)
    daily_stats = {}
    
    for date in EVENT_DATES:
        print(f"Loading data for {DAY_NAMES[date]} ({date})...")
        data = load_daily_data(date)
        
        if not data:
            continue
        
        # Process each record
        unique_today = set()
        infrastructure_count = 0
        total_dwell = 0
        location_counts = Counter()
        
        for record in data:
            address = normalize_address(record.get('address', ''))
            if not address:  # Skip invalid addresses
                continue
                
            dwell_time = record.get('dwellTime', 0)
            location = record.get('Location', 'Unknown')
            locations_visited = record.get('locationsVisited', [location])
            
            unique_today.add(address)
            all_addresses[address].append((date, record))
            
            # Track infrastructure (>8 hours)
            if dwell_time > 480:
                infrastructure_count += 1
            else:
                total_dwell += dwell_time
                location_counts[location] += 1
        
        # Calculate daily stats
        likely_attendees = len(unique_today) - infrastructure_count
        avg_dwell = total_dwell / likely_attendees if likely_attendees > 0 else 0
        
        daily_stats[date] = {
            'total_unique': len(unique_today),
            'infrastructure': infrastructure_count,
            'likely_attendees': likely_attendees,
            'avg_dwell_time': avg_dwell,
            'location_counts': dict(location_counts)
        }
        
        print(f"  Found {len(unique_today):,} unique devices")
    
    # Analyze multi-day attendance
    print("\n=== MULTI-DAY ATTENDANCE ANALYSIS ===")
    
    attendance_frequency = Counter()
    static_addresses = []
    multi_location_visitors = 0
    
    for address, appearances in all_addresses.items():
        days_attended = len(set(date for date, _ in appearances))
        attendance_frequency[days_attended] += 1
        
        # Check if it's a Random Static address
        if appearances:
            classification = classify_address(address)
            if classification == "Random Static":
                static_addresses.append((address, days_attended))
            
            # Check if visitor went to multiple locations
            for date, record in appearances:
                if len(record.get('locationsVisited', [])) > 1:
                    multi_location_visitors += 1
                    break
    
    # Print attendance frequency
    print("\nAttendance Frequency (all addresses):")
    for days, count in sorted(attendance_frequency.items()):
        print(f"  {days} day(s): {count:,} devices")
    
    # Analyze Random Static addresses specifically
    static_total = len(static_addresses)
    static_multiday = sum(1 for _, days in static_addresses if days > 1)
    
    print(f"\nRandom Static Address Analysis:")
    print(f"  Total: {static_total:,}")
    print(f"  Multi-day: {static_multiday:,}")
    print(f"  Return rate: {(static_multiday/static_total*100):.2f}%" if static_total > 0 else "N/A")
    
    if static_multiday / static_total < 0.05:
        print("\n⚠️  WARNING: Random Static return rate is impossibly low!")
        print("  This indicates addresses are being transformed daily.")
    
    print(f"\nMulti-location visitors: {multi_location_visitors:,} ({multi_location_visitors/len(all_addresses)*100:.1f}%)")
    
    # Print daily summary
    print("\n=== DAILY ATTENDANCE SUMMARY ===")
    
    for date in EVENT_DATES:
        if date not in daily_stats:
            continue
            
        stats = daily_stats[date]
        print(f"\n{DAY_NAMES[date]} ({date}):")
        print(f"  Total devices: {stats['total_unique']:,}")
        print(f"  Infrastructure: {stats['infrastructure']:,}")
        print(f"  Likely attendees: {stats['likely_attendees']:,}")
        print(f"  Avg dwell time: {stats['avg_dwell_time']:.1f} minutes")
        
        if stats['location_counts']:
            print("  By location:")
            for location, count in sorted(stats['location_counts'].items(), 
                                        key=lambda x: x[1], reverse=True):
                print(f"    {location}: {count:,}")
    
    # Calculate address entropy (simple version)
    print("\n=== ADDRESS RANDOMNESS CHECK ===")
    
    all_normalized = [normalize_address(addr) for addr in all_addresses.keys()]
    
    # Check OUI (first 6 chars) diversity
    ouis = [addr[:6] for addr in all_normalized if len(addr) == 12]
    unique_ouis = len(set(ouis))
    oui_diversity = unique_ouis / len(ouis) if ouis else 0
    
    print(f"Unique OUI prefixes: {unique_ouis:,}")
    print(f"Total addresses: {len(ouis):,}")
    print(f"OUI diversity: {oui_diversity:.1%}")
    
    if oui_diversity > 0.5:
        print("\n⚠️  High OUI diversity suggests address transformation/hashing")
    
    # Save results
    results = {
        'analysis_date': datetime.now().isoformat(),
        'event_dates': EVENT_DATES,
        'daily_stats': daily_stats,
        'attendance_frequency': dict(attendance_frequency),
        'static_return_rate': static_multiday / static_total if static_total > 0 else 0,
        'oui_diversity': oui_diversity
    }
    
    output_file = os.path.join(DATA_DIR, 'python_analysis_results.json')
    with open(output_file, 'w') as f:
        json.dump(results, f, indent=2)
    
    print(f"\n✓ Results saved to: {output_file}")
    
    # Final diagnosis
    print("\n=== FINAL DIAGNOSIS ===")
    
    if static_multiday / static_total < 0.05 and oui_diversity > 0.5:
        print("🔴 HIGH CERTAINTY: Your scanner system is transforming addresses daily")
        print("   - Random Static addresses should persist but show <5% return rate")
        print("   - Address patterns show hash-like randomness")
        print("   - Multi-day visitor tracking is impossible with current setup")
        print("\n   Recommended action: Contact scanner vendor about privacy mode settings")
    else:
        print("🟡 Results inconclusive - need further investigation")

if __name__ == "__main__":
    main()