import os
import sys
import json
import requests
import codecs
import re
from io import BytesIO
from img2txt import image_to_text_json
from requests_toolbelt.multipart.decoder import MultipartDecoder

# Ensure local modules can be imported
sys.path.insert(0, os.path.dirname(__file__))

def app(environ, start_response):
    method = environ.get('REQUEST_METHOD', '')
    path = environ.get('PATH_INFO', '')
    print("DEBUG: PATH_INFO =", path)
    print("DEBUG: REQUEST_METHOD =", method)

    if method.upper() == 'POST':
        try:
            content_type = environ.get('CONTENT_TYPE', '')
            content_length = int(environ.get('CONTENT_LENGTH', 0))
            body = environ['wsgi.input'].read(content_length)

            image_bytes = None

            if 'url' in body.decode(errors='ignore'):
                # Handle JSON body with URL
                try:
                    data = json.loads(body.decode())
                except json.JSONDecodeError:
                    raise Exception("Invalid JSON format in the body.")

                image_url = data.get('url')
                if not image_url:
                    raise Exception("No image URL provided.")

                print(f"DEBUG: Image URL received: {image_url}")

                headers = {
                    'User-Agent': 'Mozilla/5.0',
                    'Accept': 'image/*'
                }

                response = requests.get(image_url, headers=headers, verify=False, stream=True)

                if response.status_code != 200:
                    raise Exception(f"Failed to download the image. Status code: {response.status_code}")

                image_bytes = response.content

                # Optional: Save image for debugging
                with open("downloaded_fb_image.jpg", "wb") as f:
                    f.write(image_bytes)

            else:
                # Handle file upload via multipart/form-data
                decoder = MultipartDecoder(body, content_type)

                for part in decoder.parts:
                    disposition = part.headers.get(b'Content-Disposition', b'').decode()
                    if 'filename=' in disposition:
                        image_bytes = part.content
                        break

                if not image_bytes:
                    raise Exception("No file uploaded.")

            # Run OCR and get raw string result
            raw_result = image_to_text_json(image_bytes)

            # Expect raw_result to be like '{"text":"{\"text\": \"OCR text\"}"}'
            parsed_result = json.loads(raw_result)
            nested_text = parsed_result.get('text')

            if not nested_text:
                raise Exception("No 'text' field found in OCR response.")

            # Decode the escaped text string
            # cleaned_text = bytes(nested_text, "utf-8").decode("unicode_escape").strip("\"")
            # intermediate = nested_text.encode("utf-8").decode("unicode_escape")
            # cleaned_text = intermediate.encode("latin1").decode("utf-8")
            cleaned_text = nested_text

            raw_words = [w.strip() for w in cleaned_text.split('\n') if w.strip()]
            alnum_words = [re.sub(r'[^a-zA-Z0-9]', '', w) for w in raw_words]
            trimmed_words = [w[-6:] if len(w) > 6 else w for w in alnum_words]
            filtered_words = [w for w in trimmed_words if 4 <= len(w) <= 6]
            seen = set()
            unique_words = [w for w in filtered_words if not (w in seen or seen.add(w))]
            
            
            
            start_response('200 OK', [('Content-Type', 'application/json')])
            return [json.dumps({"text": unique_words}).encode('utf-8')]

            # Return cleaned text
            # start_response('200 OK', [('Content-Type', 'application/json')])
            # return [json.dumps({"text": cleaned_text}).encode('utf-8')]

        except Exception as e:
            print(f"DEBUG: Error occurred: {str(e)}")
            error_json = json.dumps({"error": str(e)})
            start_response('400 Bad Request', [('Content-Type', 'application/json')])
            return [error_json.encode('utf-8')]

    else:
        start_response('200 OK', [('Content-Type', 'text/plain')])
        return [b"Use POST with a file or image URL to extract image text."]