FastAPI server for online PDF processing

replacement.py 2.1KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. from fastapi import APIRouter
  2. from pydantic import BaseModel
  3. from tempfile import TemporaryDirectory
  4. from glob import iglob
  5. from pdf.processor import replace_texts
  6. from core.config import settings
  7. import base64
  8. import os
  9. import zipfile
  10. import json
  11. router = APIRouter()
  12. class ReplacementRequest(BaseModel):
  13. pdf_zip: str # Base64 encoded zip file
  14. replacement_data: str # Base64 encoded JSON string
  15. preserve_original_fonts: bool = True
  16. @router.post("/replacement")
  17. async def replacement(request: ReplacementRequest):
  18. base_dir = settings.BASE_DIR
  19. pdf_zip = base64.b64decode(request.pdf_zip)
  20. replacement_data = json.loads(base64.b64decode(request.replacement_data).decode('utf-8'))
  21. with TemporaryDirectory(dir=base_dir, prefix=f'tmp_', ignore_cleanup_errors=True) as temp_dir:
  22. # Save the zip file to a temporary directory
  23. pdf_zip_path = os.path.join(temp_dir, 'pdf.zip')
  24. with open(pdf_zip_path, 'wb') as f:
  25. f.write(pdf_zip)
  26. # Extract the PDFs from the zip file
  27. pdf_dir = os.path.join(temp_dir, 'pdf')
  28. output_dir = os.path.join(temp_dir, 'output')
  29. os.makedirs(output_dir, exist_ok=True)
  30. with zipfile.ZipFile(pdf_zip_path, 'r') as zip_ref:
  31. zip_ref.extractall(pdf_dir)
  32. # Replace text in the PDFs
  33. pattern = os.path.join(pdf_dir, '*.pdf')
  34. for pdf_path in iglob(pattern):
  35. pdf_name = os.path.basename(pdf_path)
  36. output_path = os.path.join(output_dir, pdf_name)
  37. replace_texts(pdf_path, output_path, replacement_data[pdf_name], request.preserve_original_fonts)
  38. # Zip the output PDFs
  39. output_zip_path = os.path.join(temp_dir, 'output.zip')
  40. with zipfile.ZipFile(output_zip_path, 'w') as zip_ref:
  41. for output_pdf_path in iglob(os.path.join(output_dir, '*.pdf')):
  42. zip_ref.write(output_pdf_path, os.path.basename(output_pdf_path))
  43. zip_file = open(output_zip_path, 'rb')
  44. data = zip_file.read()
  45. zip_file.close()
  46. return {
  47. 'status': 'success',
  48. 'data': base64.b64encode(data).decode('utf-8'),
  49. }