#!/usr/bin/env python3
import json
from collections import Counter, defaultdict

file_path = '/tmp/ecc-observer-analysis.KWPPw1.jsonl'

tool_counts = Counter()
event_counts = Counter()
tool_sequences = []
current_seq = []
bash_errors = []
write_edit_ratio = Counter()

with open(file_path, 'r') as f:
    for line in f:
        record = json.loads(line)

        tool = record.get('tool')
        event = record.get('event')
        timestamp = record.get('timestamp')

        if tool:
            tool_counts[tool] += 1

        if event:
            event_counts[event] += 1

        # Track sequences
        if event == 'tool_start':
            current_seq.append(('start', tool, timestamp))
        elif event == 'tool_complete':
            current_seq.append(('complete', tool, timestamp))
            if len(current_seq) >= 2:
                tool_sequences.append(current_seq)
            current_seq = []

        # Track Bash errors
        if tool == 'Bash' and event == 'tool_complete':
            output = record.get('output', '')
            if 'error' in output.lower() or 'failed' in output.lower():
                bash_errors.append({
                    'timestamp': timestamp,
                    'output': output
                })

# Print results
print("TOOL USAGE FREQUENCY:")
print("=" * 50)
for tool, count in tool_counts.most_common():
    print(f"{tool:25} {count:4d}")

print("\n\nEVENT FREQUENCY:")
print("=" * 50)
for event, count in sorted(event_counts.items(), key=lambda x: x[1], reverse=True):
    print(f"{event:25} {count:4d}")

print("\n\nTOP TOOL SEQUENCES (3+ occurrence patterns):")
print("=" * 50)
seq_patterns = Counter()
for seq in tool_sequences:
    if len(seq) >= 2:
        pattern = tuple((s[1]) for s in seq if s[0] == 'start')
        if len(pattern) >= 2:
            seq_patterns[pattern] += 1

for pattern, count in seq_patterns.most_common(15):
    if count >= 3:
        print(f"[{count}x] {' -> '.join(pattern)}")

print(f"\n\nBASH ERRORS FOUND: {len(bash_errors)}")
if bash_errors:
    print("\nFirst few errors:")
    for i, err in enumerate(bash_errors[:3]):
        print(f"\n{i+1}. {err['timestamp']}")
        print(f"   {err['output'][:200]}")
