# app/services/file_service.py import os import pdfplumber class FileService: """ A service to handle file-related operations, including loading PDFs from a folder. """ def __init__(self, folder_path: str): """ Initialize the FileService with the folder path to read files from. """ self.folder_path = os.path.abspath(folder_path) # print(f"[DEBUG] Initialized FileService with folder path: {self.folder_path}") def load_pdfs(self): """ Reads all PDF files from the folder and returns their paths. :return: List of paths to PDF files in the folder. """ if not os.path.exists(self.folder_path): raise FileNotFoundError(f"The folder {self.folder_path} does not exist.") pdf_files = [ os.path.join(self.folder_path, f) for f in os.listdir(self.folder_path) if f.endswith(".pdf") ] if not pdf_files: raise FileNotFoundError(f"No PDF files found in the folder {self.folder_path}.") return pdf_files def extract_text_from_pdf(self, pdf_path): """ Extracts text from the PDF file using pdfplumber. :param pdf_path: Path to the PDF file. :return: Extracted text as a string. """ text = "" with pdfplumber.open(pdf_path) as pdf: for page in pdf.pages: page_text = page.extract_text() if page_text: text += page_text + "\n" return text