/** Largest document we read client-side for a knowledge base. */
export const MAX_DOCUMENT_BYTES = 5 * 1024 * 1024;
/** File picker accept string for the supported document types. */
export const DOCUMENT_ACCEPT = '.txt,.csv,.docx';

/** Outcome of {@link extractDocumentText}: text on success, else a message. */
export interface DocumentTextResult {
  text?: string;
  error?: string;
}

/**
 * Read a .txt / .csv / .docx file into plain text, truncated to ``maxChars``.
 * Plain text and CSV are decoded directly; .docx is parsed in-browser with
 * mammoth (loaded on demand). Other types and oversized files return a
 * user-facing error instead of throwing.
 *
 * @param file {File} The selected file.
 * @param maxChars {number} Hard cap on the returned text length.
 * @returns {Promise<DocumentTextResult>} The extracted text or an error message.
 */
export const extractDocumentText = async (
  file: File,
  maxChars: number
): Promise<DocumentTextResult> => {
  if (file.size > MAX_DOCUMENT_BYTES) {
    return { error: 'That file is over 5 MB. Upload a smaller export.' };
  }
  const name: string = file.name.toLowerCase();
  try {
    let text = '';
    if (name.endsWith('.docx')) {
      const mammoth = (await import('mammoth')).default;
      const arrayBuffer: ArrayBuffer = await file.arrayBuffer();
      const result = await mammoth.extractRawText({ arrayBuffer });
      text = result.value;
    } else if (
      name.endsWith('.txt') ||
      name.endsWith('.csv') ||
      file.type === 'text/plain' ||
      file.type === 'text/csv'
    ) {
      text = await file.text();
    } else {
      return { error: 'Upload a .txt, .csv or .docx file.' };
    }
    const trimmed: string = text.trim();
    if (!trimmed) {
      return { error: 'That file looks empty - there was no text to read.' };
    }
    return { text: trimmed.slice(0, maxChars) };
  } catch {
    return { error: 'Could not read that file. Paste the text instead.' };
  }
};