import React, {useState} from 'react'
import Tesseract from 'tesseract.js'

export default function OCR(){
    const [imageSrc, setImageSrc] = useState('');
    const [extractedText, setExtractedText] = useState('');

    function removeUnwantedLineBreaks(inputString) {
        const lines = inputString.split('\n');
        let result = '';
        
        for (let i = 0; i < lines.length; i++) {
            console.log(lines[i].length)
          if (i === lines.length - 1) {
            result += lines[i]; // Keep the last line intact
          } else if (lines[i + 1].length !== 0) {
            result += lines[i] + ' '; // Add a space instead of the line break
          } else {
            result += lines[i] + '\n'; // Keep line break if next line starts with one
          }
        }
      
        return result;
      }      

    const handleDrop = async (e) => {
        e.preventDefault();

        const file = e.dataTransfer.files[0];
        const reader = new FileReader();

        reader.onload = async () => {
            setImageSrc(reader.result);

            const image = new Image();
            image.src = reader.result;

            image.onload = async () => {
                Tesseract.recognize(
                    image,
                    'eng',
                    { logger: m => console.log(m) }
                ).then(({ data: { text } }) => {
                    console.log(text);
                    setExtractedText(text)
                })
            };
        };

        reader.readAsDataURL(file);
    };
    const handlePaste = async (e) => {
        e.preventDefault();
    
        const items = e.clipboardData.items;
        for (let i = 0; i < items.length; i++) {
          const item = items[i];
          if (item.type.indexOf('image') !== -1) {
            const blob = item.getAsFile();
            const reader = new FileReader();
    
            reader.onload = async () => {
              setImageSrc(reader.result);
    
              const image = new Image();
              image.src = reader.result;
    
              image.onload = async () => {
                Tesseract.recognize(
                    image,
                    'eng',
                    { logger: m => console.log(m) }
                ).then(({ data: { text } }) => {
                    const removedLines = removeUnwantedLineBreaks(text)
                    console.log(removedLines)
                    setExtractedText(removedLines)
                })
            };
            };
    
            reader.readAsDataURL(blob);
          }
        }}

    return(
        <>
            <div>
                <h1>Image to Text OCR</h1>
                <div
                    className="dropzone"
                    onDrop={handleDrop}
                    onDragOver={(e) => e.preventDefault()}
                    onPaste={handlePaste}
                    style={{ border: '2px dashed #aaa', padding: '20px', textAlign: 'center' }}
                >
                    {imageSrc ? <img src={imageSrc} alt="Uploaded" style={{ maxWidth: '100%' }} /> : 'Drag and drop an image here'}
                </div>
                {extractedText && (
                    <div style={{ marginTop: '20px' }}>
                    <h2>Extracted Text</h2>
                    <p>{extractedText}</p>
                    <button onClick={()=>navigator.clipboard.writeText(extractedText)}>copy</button>
                    </div>
                )}
            </div>
        </>
    )
}