import pandas as pd
import os

def extract_and_clean_data(excel_file_path="Sensor VASP Pricelist November 2024 (1).pdf"):
    """
    Extracts and cleans camera and cabling data from an Excel file, handling errors and inconsistencies.

    Args:
        excel_file_path (str): The path to the Excel file.
            Defaults to "Sensor VASP Pricelist November 2024 (1).pdf".

    Returns:
        pd.DataFrame: A cleaned DataFrame containing camera type, sensor product code,
        and lens size, or None if an error occurs.  Also saves the cleaned
        data to a CSV file named "camera_and_cabling_data.csv".
    """
    try:
        # Read the Excel file into a pandas DataFrame
        # Important:  Because the file is actually a PDF, this will fail.
        #  We would need a library like PyPDF2 to extract the data, and even then,
        #  it might not be in a tabular format that pandas can read directly.
        #  For this example, I'm going to simulate reading from a CSV,
        #  assuming you have converted the relevant table to a CSV file.
        #  You'll need to replace this with the actual logic to read your data.
        # df = pd.read_excel(excel_file_path) #  <--  This will likely cause an error
        #
        #  Here's a placeholder for how you might read from a CSV *after*
        #  you've extracted the table from the PDF:
        csv_file_path = "camera_and_cabling_data.csv"  # Replace with your CSV file name
        try:
            df = pd.read_csv(csv_file_path)
        except FileNotFoundError:
            print(f"Error: CSV file '{csv_file_path}' not found.  Please ensure the data is converted to CSV.")
            return None

        # Print the column names to help with debugging
        print("Column names in the Excel/CSV file:")
        print(df.columns)

        # Rename columns to handle potential variations in naming
        df = df.rename(columns={
            "Camera Type": "Camera Type",  # Keep it as is, if it already matches
            "Sensor Product Code": "Sensor Product Code",  # Keep it as is
            "Lens Size": "Lens Size",  # Keep it as is
            "Camera Type ": "Camera Type", #Handling extra space
            "Sensor Product Code ": "Sensor Product Code", #Handling extra space
            "Lens Size ": "Lens Size" #Handling extra space
        })

        # Check if the required columns exist
        required_columns = ["Camera Type", "Sensor Product Code", "Lens Size"]
        missing_columns = [col for col in required_columns if col not in df.columns]
        if missing_columns:
            print(f"Error: The following required columns are missing: {', '.join(missing_columns)}")
            print("Please ensure the Excel/CSV file has these columns, or adjust the column names in the code.")
            return None

        # Drop rows where any of the required columns have null values
        df = df.dropna(subset=required_columns)

        # Remove leading/trailing whitespace from string columns
        for col in df.columns:
            if df[col].dtype == 'object':  # Check if the column contains strings
                df[col] = df[col].str.strip()

        # Print the first few rows of the cleaned DataFrame
        print("\nCleaned Data:")
        print(df.head().to_markdown(index=False, numalign="left", stralign="left"))

        # Output the cleaned data to a CSV file
        df.to_csv("camera_and_cabling_data.csv", index=False)
        print("\nCleaned data saved to 'camera_and_cabling_data.csv'")
        return df

    except Exception as e:
        print(f"An error occurred: {e}")
        return None  # Return None to indicate an error


def get_price(camera_type, lens_size, data_df=None, excel_file_path="camera_and_cabling_data.csv"):
    """
    Fetches the price for a given camera type and lens size.  This version
    works with the DataFrame created by extract_and_clean_data(), or
    it can read from the CSV file.

    Args:
        camera_type (str): The type of camera.
        lens_size (str): The lens size.
        data_df (pd.DataFrame, optional):  A DataFrame containing the data.
            If None, the function will read from the CSV file.
            Defaults to None.
        excel_file_path (str): only used if data_df is None
    Returns:
        str: The price (from 'Sensor Product Code' column), or a message
        if not found or an error occurs.
    """
    try:
        if data_df is None:
            try:
                df = pd.read_csv(excel_file_path)
            except FileNotFoundError:
                return f"Error: CSV file '{excel_file_path}' not found.  Please run extract_and_clean_data() first."
        else:
            df = data_df.copy()  # Use a copy to avoid modifying the original DataFrame

        # Convert to lowercase and strip spaces for robust matching
        df['Camera Type'] = df['Camera Type'].str.lower().str.strip()
        df['Lens Size'] = df['Lens Size'].str.lower().str.strip()
        camera_type = camera_type.lower().strip()
        lens_size = lens_size.lower().strip()

        # Find the row matching the camera type and lens size
        result = df[(df['Camera Type'] == camera_type) & (df['Lens Size'] == lens_size)]

        if not result.empty:
            # Return the 'Sensor Product Code' which I'm assuming holds the price.
            #  You might need to adjust this column name.
            return result['Sensor Product Code'].iloc[0]
        else:
            return "Price not found for the specified camera and lens size."
    except Exception as e:
        return f"An error occurred: {e}"

def create_index_html(data_df, output_file="index.html"):
    """
    Generates an HTML index page from a pandas DataFrame.

    Args:
        data_df (pd.DataFrame): The DataFrame containing the camera and cabling data.
        output_file (str, optional): The name of the HTML output file.
            Defaults to "index.html".
    """
    if data_df is None or data_df.empty:
        print("Error: No data to generate HTML.")
        return

    # Basic HTML structure
    html_content = """
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>Camera and Cabling Index</title>
        <style>
            body {
                font-family: Arial, sans-serif;
                margin: 20px;
            }
            table {
                width: 100%;
                border-collapse: collapse;
                margin-top: 20px;
            }
            th, td {
                border: 1px solid #ddd;
                padding: 8px;
                text-align: left;
            }
            th {
                background-color: #f0f0f0;
            }
            h1 {
                color: #333;
            }
            a {
                color: #0078d7;
                text-decoration: none;
            }
            a:hover {
                text-decoration: underline;
            }
        </style>
    </head>
    <body>
        <h1>Camera and Cabling Index</h1>
        <p>
            This page provides an index of cameras and cabling.  Click on a camera type
            to see the details.
        </p>
        <table>
            <thead>
                <tr>
                    <th>Camera Type</th>
                    <th>Lens Size</th>
                    <th>Product Code/Price</th>
                    <th>Details</th>
                </tr>
            </thead>
            <tbody>
    """

    # Group by Camera Type and create rows
    for camera_type, group in data_df.groupby("Camera Type"):
        # Get the first row of the group to display the Camera Type and Lens Size
        first_row = group.iloc[0]
        html_content += f"""
                <tr>
                    <td>{camera_type}</td>
                    <td>{first_row['Lens Size']}</td>
                    <td>{first_row['Sensor Product Code']}</td>
                    <td>
                        <a href="#camera-{camera_type.replace(' ', '-')}">View Details</a>
                    </td>
                </tr>
        """

    html_content += """
            </tbody>
        </table>
    """

    # Add details for each camera type as anchors
    for camera_type, group in data_df.groupby("Camera Type"):
        html_content += f"""
        <h2 id="camera-{camera_type.replace(' ', '-')}">{camera_type}</h2>
        <ul>
        """
        for _, row in group.iterrows():
            html_content += f"""
            <li>
                <strong>Lens Size:</strong> {row['Lens Size']},
                <strong>Product Code/Price:</strong> {row['Sensor Product Code']}
            </li>
            """
        html_content += "</ul>"

    html_content += """
    </body>
    </html>
    """

    # Write the HTML content to a file
    try:
        with open(output_file, "w", encoding="utf-8") as f:
            f.write(html_content)
        print(f"Successfully created index page: {output_file}")
    except Exception as e:
        print(f"Error writing to HTML file: {e}")
        return None

def main():
    """
    Main function to orchestrate the data extraction, cleaning, and HTML generation.
    """
    # 1. Extract and clean the data
    cleaned_data_df = extract_and_clean_data()
    if cleaned_data_df is not None:
        # 2. Generate the HTML index page
        create_index_html(cleaned_data_df)

if __name__ == "__main__":
    main()
    def add_navigation_menu(html_content):
        """
        Adds a navigation menu to the HTML content.

        Args:
            html_content (str): The original HTML content.

        Returns:
            str: The HTML content with the navigation menu added.
        """
        navigation_menu = """
        <nav>
            <ul style="list-style-type: none; padding: 0; margin: 0; background-color: #f0f0f0; overflow: hidden;">
                <li style="float: left; margin: 0;">
                    <a href="#top" style="display: block; color: #333; text-align: center; padding: 14px 16px; text-decoration: none;">Home</a>
                </li>
                <li style="float: left; margin: 0;">
                    <a href="#camera-list" style="display: block; color: #333; text-align: center; padding: 14px 16px; text-decoration: none;">Camera List</a>
                </li>
                <li style="float: left; margin: 0;">
                    <a href="#details" style="display: block; color: #333; text-align: center; padding: 14px 16px; text-decoration: none;">Details</a>
                </li>
            </ul>
        </nav>
        """
        # Insert the navigation menu after the opening <body> tag
        return html_content.replace("<body>", f"<body>\n{navigation_menu}")

    # Modify the create_index_html function to include the navigation menu
    def create_index_html(data_df, output_file="index.html"):
        """
        Generates an HTML index page from a pandas DataFrame with a navigation menu.

        Args:
            data_df (pd.DataFrame): The DataFrame containing the camera and cabling data.
            output_file (str, optional): The name of the HTML output file.
                Defaults to "index.html".
        """
        if data_df is None or data_df.empty:
            print("Error: No data to generate HTML.")
            return

        # Basic HTML structure
        html_content = """
        <!DOCTYPE html>
        <html lang="en">
        <head>
            <meta charset="UTF-8">
            <meta name="viewport" content="width=device-width, initial-scale=1.0">
            <title>Camera and Cabling Index</title>
            <style>
                body {
                    font-family: Arial, sans-serif;
                    margin: 20px;
                }
                table {
                    width: 100%;
                    border-collapse: collapse;
                    margin-top: 20px;
                }
                th, td {
                    border: 1px solid #ddd;
                    padding: 8px;
                    text-align: left;
                }
                th {
                    background-color: #f0f0f0;
                }
                h1 {
                    color: #333;
                }
                a {
                    color: #0078d7;
                    text-decoration: none;
                }
                a:hover {
                    text-decoration: underline;
                }
            </style>
        </head>
        <body>
            <h1 id="top">Camera and Cabling Index</h1>
            <p>
                This page provides an index of cameras and cabling. Click on a camera type
                to see the details.
            </p>
            <table id="camera-list">
                <thead>
                    <tr>
                        <th>Camera Type</th>
                        <th>Lens Size</th>
                        <th>Product Code/Price</th>
                        <th>Details</th>
                    </tr>
                </thead>
                <tbody>
        """

        # Group by Camera Type and create rows
        for camera_type, group in data_df.groupby("Camera Type"):
            # Get the first row of the group to display the Camera Type and Lens Size
            first_row = group.iloc[0]
            html_content += f"""
                    <tr>
                        <td>{camera_type}</td>
                        <td>{first_row['Lens Size']}</td>
                        <td>{first_row['Sensor Product Code']}</td>
                        <td>
                            <a href="#camera-{camera_type.replace(' ', '-')}">View Details</a>
                        </td>
                    </tr>
            """

        html_content += """
                </tbody>
            </table>
        """

        # Add details for each camera type as anchors
        html_content += '<div id="details">'
        for camera_type, group in data_df.groupby("Camera Type"):
            html_content += f"""
            <h2 id="camera-{camera_type.replace(' ', '-')}">{camera_type}</h2>
            <ul>
            """
            for _, row in group.iterrows():
                html_content += f"""
                <li>
                    <strong>Lens Size:</strong> {row['Lens Size']},
                    <strong>Product Code/Price:</strong> {row['Sensor Product Code']}
                </li>
                """
            html_content += "</ul>"
        html_content += "</div>"

        html_content += """
        </body>
        </html>
        """

        # Add the navigation menu
        html_content = add_navigation_menu(html_content)

        # Write the HTML content to a file
        try:
            with open(output_file, "w", encoding="utf-8") as f:
                f.write(html_content)
            print(f"Successfully created index page with navigation menu: {output_file}")
        except Exception as e:
            print(f"Error writing to HTML file: {e}")
            return None