import os
import re
from pathlib import Path

def parse_patent_file(filepath):
    """
    Parse a patent text file and extract sections.
    Returns a dictionary with section names as keys and content as values.
    """
    sections = {}
    current_section = None
    current_content = []
    
    with open(filepath, 'r', encoding='utf-8') as f:
        lines = f.readlines()
    
    # Define major section patterns - these identify section headers
    section_patterns = [
        (r'^TITLE:\s*(.*)$', 'TITLE'),
        (r'^Patent Number:\s*(.*)$', 'Patent Number'),
        (r'^INVENTORS:(.*)$', 'INVENTORS'),
        (r'^ABSTRACT:(.*)$', 'ABSTRACT'),
        (r'^DESCRIPTION(.*)$', 'DESCRIPTION'),
        (r'^CLAIMS:(.*)$', 'CLAIMS'),
        (r'^BACKGROUND(.*)$', 'BACKGROUND'),
        (r'^SUMMARY(.*)$', 'SUMMARY'),
        (r'^DETAILED DESCRIPTION(.*)$', 'DETAILED DESCRIPTION'),
        (r'^FIELD OF THE INVENTION(.*)$', 'FIELD OF THE INVENTION'),
        (r'^BRIEF DESCRIPTION(.*)$', 'BRIEF DESCRIPTION')
    ]
    
    for line in lines:
        line_stripped = line.strip()
        
        # Check if this line starts a new major section
        is_section_header = False
        for pattern, section_name in section_patterns:
            match = re.match(pattern, line_stripped, re.IGNORECASE)
            if match:
                # Save previous section if exists
                if current_section:
                    sections[current_section] = '\n'.join(current_content).strip()
                
                # Start new section
                current_section = section_name
                current_content = []
                is_section_header = True
                
                # If there's content on the same line as the header (like TITLE: Some Title)
                # add it as the first line of content
                inline_content = match.group(1).strip() if match.groups() else ''
                if inline_content:
                    current_content.append(inline_content)
                break
        
        # If not a section header, add to current section content
        if not is_section_header and current_section:
            current_content.append(line.rstrip())
    
    # Save last section
    if current_section:
        sections[current_section] = '\n'.join(current_content).strip()
    
    return sections

def create_html_comparison(patent_files, output_filename='patent_comparison.html'):
    """
    Create an HTML file showing patents side by side with aligned sections.
    """
    # Parse all patent files
    patents_data = []
    patent_names = []
    
    for filepath in patent_files:
        patents_data.append(parse_patent_file(filepath))
        # Extract filename without extension for column header
        patent_names.append(Path(filepath).stem)
    
    # Collect all unique sections across all patents
    all_sections = []
    seen_sections = set()
    
    # First pass: collect sections in order of appearance
    for patent in patents_data:
        for section in patent.keys():
            if section not in seen_sections:
                all_sections.append(section)
                seen_sections.add(section)
    
    # Generate HTML
    html_content = """
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Patent Comparison</title>
    <style>
        body {
            font-family: Arial, sans-serif;
            margin: 20px;
            background-color: #f5f5f5;
        }
        
        .container {
            max-width: 100%;
            overflow-x: auto;
        }
        
        table {
            width: 100%;
            border-collapse: collapse;
            background-color: white;
            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
        }
        
        th {
            background-color: #2c3e50;
            color: white;
            padding: 15px;
            text-align: left;
            position: sticky;
            top: 0;
            z-index: 10;
            font-size: 14px;
        }
        
        td {
            padding: 15px;
            vertical-align: top;
            border: 1px solid #ddd;
            font-size: 13px;
            line-height: 1.6;
            color: #333;
        }
        
        .section-header {
            background-color: #34495e;
            color: white !important;
            font-weight: bold;
            padding: 10px 15px;
            text-align: center;
        }
        
        .section-content {
            white-space: pre-wrap;
            word-wrap: break-word;
            max-height: 400px;
            overflow-y: auto;
            color: #333;
        }
        
        .empty-section {
            color: #999;
            font-style: italic;
            text-align: center;
        }
        
        .patent-column {
            width: """ + str(100 // len(patent_files)) + """%;
            min-width: 300px;
        }
        
        h1 {
            color: #2c3e50;
            text-align: center;
            margin-bottom: 30px;
        }
        
        .section-row:nth-child(even) td:not(.section-header) {
            background-color: #f9f9f9;
        }
        
        .section-row td:not(.section-header) {
            color: #333 !important;
        }
        
        /* Scrollbar styling */
        .section-content::-webkit-scrollbar {
            width: 8px;
        }
        
        .section-content::-webkit-scrollbar-track {
            background: #f1f1f1;
        }
        
        .section-content::-webkit-scrollbar-thumb {
            background: #888;
            border-radius: 4px;
        }
        
        .section-content::-webkit-scrollbar-thumb:hover {
            background: #555;
        }
        
        /* Print styles */
        @media print {
            .section-content {
                max-height: none !important;
                overflow: visible !important;
            }
            th {
                position: static;
            }
        }
    </style>
</head>
<body>
    <h1>Patent Comparison View</h1>
    <div class="container">
        <table>
            <thead>
                <tr>
                    <th style="width: 150px;">Section</th>
"""
    
    # Add column headers for each patent
    for name in patent_names:
        html_content += f'                    <th class="patent-column">{name}</th>\n'
    
    html_content += """                </tr>
            </thead>
            <tbody>
"""
    
    # Add rows for each section
    for section in all_sections:
        html_content += f'                <tr class="section-row">\n'
        html_content += f'                    <td class="section-header">{section}</td>\n'
        
        # Add content for each patent
        for patent in patents_data:
            content = patent.get(section, '')
            if content:
                # Escape HTML characters
                content = content.replace('&', '&amp;')
                content = content.replace('<', '&lt;')
                content = content.replace('>', '&gt;')
                html_content += f'                    <td><div class="section-content">{content}</div></td>\n'
            else:
                html_content += f'                    <td><div class="empty-section">[No content in this section]</div></td>\n'
        
        html_content += '                </tr>\n'
    
    html_content += """            </tbody>
        </table>
    </div>
    
    <script>
        // Add click-to-expand functionality for content cells
        document.querySelectorAll('.section-content').forEach(function(element) {
            element.addEventListener('dblclick', function() {
                if (this.style.maxHeight === 'none') {
                    this.style.maxHeight = '400px';
                } else {
                    this.style.maxHeight = 'none';
                }
            });
        });
    </script>
</body>
</html>
"""
    
    # Write HTML file
    with open(output_filename, 'w', encoding='utf-8') as f:
        f.write(html_content)
    
    print(f"Comparison HTML file created: {output_filename}")
    print(f"Compared {len(patent_files)} patents with {len(all_sections)} unique sections")

def main():
    print("=== Patent Text Side-by-Side Viewer ===\n")
    
    # Get list of patent files to compare
    patent_files = []
    
    print("Enter the patent text files to compare (one per line).")
    print("Press Enter twice when done:\n")
    
    while True:
        filename = input("Patent file: ").strip()
        if not filename:
            if patent_files:
                break
            else:
                print("Please enter at least one file.")
                continue
        
        # Check if file exists
        if not os.path.exists(filename):
            print(f"Error: File '{filename}' not found. Please try again.")
            continue
        
        patent_files.append(filename)
        print(f"Added: {filename}")
    
    if len(patent_files) < 2:
        print("\nWarning: Comparing less than 2 files. Consider adding more files for comparison.")
    
    # Get output filename
    print("\nEnter output HTML filename (press Enter for default: patent_comparison.html):")
    output_file = input("Filename: ").strip()
    
    if not output_file:
        output_file = "patent_comparison.html"
    
    # Add .html extension if not present
    if not output_file.endswith('.html'):
        output_file += '.html'
    
    print(f"\nCreating comparison view...")
    
    # Create the comparison
    create_html_comparison(patent_files, output_file)
    
    print(f"\nDone! Open '{output_file}' in your web browser to view the comparison.")
    print("\nTip: Double-click on any content cell to expand/collapse it.")

if __name__ == "__main__":
    main()