import { parse as csvParse } from "csv-parse/browser/esm/sync";
import { extractDocxText } from "../api/extractDocxText";
import { extractPdfText } from "../api/extractPdfText";

export class DocumentParser {
  private static readonly SUPPORTED_TYPES = ["csv", "txt", "pdf", "docx"];

  async parseDocument(file: File): Promise<string> {
    const fileType = this.getFileType(file.name);

    if (fileType === "pdf") {
      return this.parsePdf(file);
    }

    if (fileType === "docx") {
      return this.parseDocx(file);
    }

    const buffer = await this.getFileBuffer(file);

    switch (fileType) {
      case "csv":
        return this.parseCsv(buffer);
      case "txt":
        return this.parseTxt(buffer);
      default:
        throw new Error(`Unsupported file type: ${fileType}`);
    }
  }

  private getFileType(fileName: string): string {
    const extension = fileName.split(".").pop()?.toLowerCase() || "";
    if (!DocumentParser.SUPPORTED_TYPES.includes(extension)) {
      throw new Error(`Unsupported file type: ${extension}`);
    }
    return extension;
  }

  private async getFileBuffer(file: File): Promise<ArrayBuffer> {
    try {
      return await file.arrayBuffer();
    } catch (error) {
      throw new Error(`Failed to read file: ${(error as Error).message}`);
    }
  }

  private parseCsv(buffer: ArrayBuffer): string {
    try {
      const text = new TextDecoder().decode(buffer);
      const records = csvParse(text, {
        skip_empty_lines: true,
        trim: true,
      });
      return records
        .map((row: string[]) => row.join(", "))
        .join("\n")
        .trim();
    } catch (error) {
      throw new Error(`Failed to parse CSV: ${(error as Error).message}`);
    }
  }

  private parseTxt(buffer: ArrayBuffer): string {
    try {
      return new TextDecoder().decode(buffer).trim();
    } catch (error) {
      throw new Error(`Failed to parse TXT: ${(error as Error).message}`);
    }
  }

  private async parsePdf(file: File): Promise<string> {
    try {
      return await extractPdfText({ file });
    } catch (error) {
      throw new Error(`Failed to parse PDF: ${(error as Error).message}`);
    }
  }

  private async parseDocx(file: File): Promise<string> {
    try {
      return await extractDocxText({ file });
    } catch (error) {
      throw new Error(`Failed to parse DOCX: ${(error as Error).message}`);
    }
  }
}
