ProductQuery/app/services/file_service.py

# app/services/file_service.py

import os
import pdfplumber

class FileService:
    """
    A service to handle file-related operations, including loading PDFs from a folder.
    """
    def __init__(self, folder_path: str):
        """
        Initialize the FileService with the folder path to read files from.
        """
        self.folder_path = os.path.abspath(folder_path)
      #  print(f"[DEBUG] Initialized FileService with folder path: {self.folder_path}")

    def load_pdfs(self):
        """
        Reads all PDF files from the folder and returns their paths.

        :return: List of paths to PDF files in the folder.
        """
        if not os.path.exists(self.folder_path):
            raise FileNotFoundError(f"The folder {self.folder_path} does not exist.")

        pdf_files = [
            os.path.join(self.folder_path, f)
            for f in os.listdir(self.folder_path)
            if f.endswith(".pdf")
        ]

        if not pdf_files:
            raise FileNotFoundError(f"No PDF files found in the folder {self.folder_path}.")

        return pdf_files

    def extract_text_from_pdf(self, pdf_path):
        """
        Extracts text from the PDF file using pdfplumber.
        :param pdf_path: Path to the PDF file.
        :return: Extracted text as a string.
        """
        text = ""
        with pdfplumber.open(pdf_path) as pdf:
            for page in pdf.pages:
                page_text = page.extract_text()
                if page_text:
                    text += page_text + "\n"
        return text