Spaces:
Runtime error
Runtime error
Amélioration du support Poppler pour la conversion PDF et la prévisualisation
Browse files- Dockerfile +15 -3
- app.py +79 -4
Dockerfile
CHANGED
|
@@ -2,12 +2,24 @@ FROM python:3.9
|
|
| 2 |
|
| 3 |
WORKDIR /code
|
| 4 |
|
| 5 |
-
# Installer poppler-utils pour pdf2image
|
| 6 |
-
RUN apt-get update && apt-get install -y
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
COPY requirements.txt /code/requirements.txt
|
| 9 |
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
COPY . /code
|
| 12 |
|
| 13 |
-
|
|
|
|
|
|
| 2 |
|
| 3 |
WORKDIR /code
|
| 4 |
|
| 5 |
+
# Installer poppler-utils complet pour pdf2image
|
| 6 |
+
RUN apt-get update && apt-get install -y \
|
| 7 |
+
poppler-utils \
|
| 8 |
+
libpoppler-cpp-dev \
|
| 9 |
+
libpoppler-private-dev \
|
| 10 |
+
pkg-config \
|
| 11 |
+
&& rm -rf /var/lib/apt/lists/* \
|
| 12 |
+
&& which pdftoppm && echo "Poppler correctement installé!" \
|
| 13 |
+
|| echo "ERREUR: Poppler non installé correctement!"
|
| 14 |
|
| 15 |
COPY requirements.txt /code/requirements.txt
|
| 16 |
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
| 17 |
|
| 18 |
+
# Vérification de l'installation de pdf2image
|
| 19 |
+
RUN python -c "from pdf2image import convert_from_bytes; print('pdf2image est correctement installé!')" \
|
| 20 |
+
|| echo "ERREUR: pdf2image n'est pas installé correctement!"
|
| 21 |
+
|
| 22 |
COPY . /code
|
| 23 |
|
| 24 |
+
# Commande de démarrage de l'application Gradio
|
| 25 |
+
CMD ["python", "app.py"]
|
app.py
CHANGED
|
@@ -4,9 +4,48 @@ import os
|
|
| 4 |
import json
|
| 5 |
import time
|
| 6 |
import base64
|
|
|
|
| 7 |
from PIL import Image
|
| 8 |
from pdf2image import convert_from_path
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
# Configuration
|
| 11 |
GOOGLE_API_KEY = "AIzaSyA4ma5pE1pPCzHHn-i9tDWuKqQEgSltMtI"
|
| 12 |
genai.configure(api_key=GOOGLE_API_KEY)
|
|
@@ -919,7 +958,21 @@ def process_document(file, progress=gr.Progress()):
|
|
| 919 |
|
| 920 |
try:
|
| 921 |
if file.name.lower().endswith('.pdf'):
|
| 922 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 923 |
|
| 924 |
if len(images) > 10:
|
| 925 |
return {"error": TEXT["error"]["too_many_pages"]}
|
|
@@ -954,8 +1007,27 @@ def update_preview(file):
|
|
| 954 |
|
| 955 |
if file.name.lower().endswith('.pdf'):
|
| 956 |
try:
|
| 957 |
-
#
|
| 958 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 959 |
image_paths = []
|
| 960 |
|
| 961 |
for i, img in enumerate(images):
|
|
@@ -963,8 +1035,11 @@ def update_preview(file):
|
|
| 963 |
img.save(temp_filename)
|
| 964 |
image_paths.append(temp_filename)
|
| 965 |
|
|
|
|
| 966 |
return image_paths
|
| 967 |
-
except:
|
|
|
|
|
|
|
| 968 |
return []
|
| 969 |
elif file.name.lower().endswith(('.png', '.jpg', '.jpeg')):
|
| 970 |
return [file.name]
|
|
|
|
| 4 |
import json
|
| 5 |
import time
|
| 6 |
import base64
|
| 7 |
+
import subprocess
|
| 8 |
from PIL import Image
|
| 9 |
from pdf2image import convert_from_path
|
| 10 |
|
| 11 |
+
# Vérification de la disponibilité de Poppler
|
| 12 |
+
def check_poppler():
|
| 13 |
+
poppler_path = None
|
| 14 |
+
potential_paths = [
|
| 15 |
+
'/usr/bin',
|
| 16 |
+
'/usr/local/bin',
|
| 17 |
+
'/opt/homebrew/bin',
|
| 18 |
+
'/app/bin'
|
| 19 |
+
]
|
| 20 |
+
|
| 21 |
+
for path in potential_paths:
|
| 22 |
+
if os.path.exists(os.path.join(path, 'pdftoppm')):
|
| 23 |
+
poppler_path = path
|
| 24 |
+
print(f"✅ Poppler trouvé dans: {poppler_path}")
|
| 25 |
+
return poppler_path
|
| 26 |
+
|
| 27 |
+
# Essayer de localiser avec la commande which
|
| 28 |
+
try:
|
| 29 |
+
which_result = subprocess.run(['which', 'pdftoppm'], capture_output=True, text=True)
|
| 30 |
+
if which_result.returncode == 0:
|
| 31 |
+
poppler_bin = which_result.stdout.strip()
|
| 32 |
+
poppler_path = os.path.dirname(poppler_bin)
|
| 33 |
+
print(f"✅ Poppler trouvé via 'which' dans: {poppler_path}")
|
| 34 |
+
return poppler_path
|
| 35 |
+
except:
|
| 36 |
+
pass
|
| 37 |
+
|
| 38 |
+
print("⚠️ AVERTISSEMENT: Poppler non trouvé dans les chemins standards!")
|
| 39 |
+
print("⚠️ Les fonctionnalités de prévisualisation et de traitement PDF pourraient ne pas fonctionner.")
|
| 40 |
+
print("⚠️ Veuillez installer Poppler:")
|
| 41 |
+
print(" - Linux: apt-get install poppler-utils")
|
| 42 |
+
print(" - macOS: brew install poppler")
|
| 43 |
+
print(" - Windows: Téléchargez depuis https://github.com/oschwartz10612/poppler-windows/")
|
| 44 |
+
return None
|
| 45 |
+
|
| 46 |
+
# Vérifier Poppler au démarrage
|
| 47 |
+
POPPLER_PATH = check_poppler()
|
| 48 |
+
|
| 49 |
# Configuration
|
| 50 |
GOOGLE_API_KEY = "AIzaSyA4ma5pE1pPCzHHn-i9tDWuKqQEgSltMtI"
|
| 51 |
genai.configure(api_key=GOOGLE_API_KEY)
|
|
|
|
| 958 |
|
| 959 |
try:
|
| 960 |
if file.name.lower().endswith('.pdf'):
|
| 961 |
+
# Utiliser le chemin Poppler détecté au démarrage
|
| 962 |
+
if POPPLER_PATH:
|
| 963 |
+
images = convert_from_path(
|
| 964 |
+
file.name,
|
| 965 |
+
poppler_path=POPPLER_PATH,
|
| 966 |
+
use_pdftocairo=True,
|
| 967 |
+
dpi=150
|
| 968 |
+
)
|
| 969 |
+
else:
|
| 970 |
+
print("Trying without poppler_path")
|
| 971 |
+
images = convert_from_path(
|
| 972 |
+
file.name,
|
| 973 |
+
use_pdftocairo=True,
|
| 974 |
+
dpi=150
|
| 975 |
+
)
|
| 976 |
|
| 977 |
if len(images) > 10:
|
| 978 |
return {"error": TEXT["error"]["too_many_pages"]}
|
|
|
|
| 1007 |
|
| 1008 |
if file.name.lower().endswith('.pdf'):
|
| 1009 |
try:
|
| 1010 |
+
# Utiliser le chemin Poppler détecté au démarrage
|
| 1011 |
+
if POPPLER_PATH:
|
| 1012 |
+
images = convert_from_path(
|
| 1013 |
+
file.name,
|
| 1014 |
+
first_page=1,
|
| 1015 |
+
last_page=3,
|
| 1016 |
+
poppler_path=POPPLER_PATH,
|
| 1017 |
+
use_pdftocairo=True,
|
| 1018 |
+
dpi=150
|
| 1019 |
+
)
|
| 1020 |
+
else:
|
| 1021 |
+
# Essayer sans spécifier le chemin, en utilisant des options simplifiées
|
| 1022 |
+
print("Trying without poppler_path")
|
| 1023 |
+
images = convert_from_path(
|
| 1024 |
+
file.name,
|
| 1025 |
+
first_page=1,
|
| 1026 |
+
last_page=3,
|
| 1027 |
+
use_pdftocairo=True,
|
| 1028 |
+
dpi=150
|
| 1029 |
+
)
|
| 1030 |
+
|
| 1031 |
image_paths = []
|
| 1032 |
|
| 1033 |
for i, img in enumerate(images):
|
|
|
|
| 1035 |
img.save(temp_filename)
|
| 1036 |
image_paths.append(temp_filename)
|
| 1037 |
|
| 1038 |
+
print(f"Successfully created {len(image_paths)} preview images")
|
| 1039 |
return image_paths
|
| 1040 |
+
except Exception as e:
|
| 1041 |
+
print(f"Error converting PDF to images: {str(e)}")
|
| 1042 |
+
# En cas d'erreur, retourner une image d'erreur qui sera affichée
|
| 1043 |
return []
|
| 1044 |
elif file.name.lower().endswith(('.png', '.jpg', '.jpeg')):
|
| 1045 |
return [file.name]
|