import gradio as gr import tempfile import shutil from pdf_extractor import extract_text_pdf_raw from word_extractor import extract_red_text_with_labels, is_red_font from docx import Document from docx.shared import RGBColor import difflib def find_best_match(target, candidates): match = difflib.get_close_matches(target, candidates, n=1, cutoff=0.5) return match[0] if match else None def replace_red_text_in_doc(doc_path, replacements): doc = Document(doc_path) for para in doc.paragraphs: for run in para.runs: if is_red_font(run): old_text = run.text.strip() new_text = find_best_match(old_text, replacements) if new_text: run.text = new_text run.font.color.rgb = RGBColor(0, 0, 0) # Set to black for table in doc.tables: for row in table.rows: for cell in row.cells: for para in cell.paragraphs: for run in para.runs: if is_red_font(run): old_text = run.text.strip() new_text = find_best_match(old_text, replacements) if new_text: run.text = new_text run.font.color.rgb = RGBColor(0, 0, 0) # Set to black temp_dir = tempfile.mkdtemp() updated_path = f"{temp_dir}/updated.docx" doc.save(updated_path) return updated_path def process_files(pdf_file, word_file): pdf_path = pdf_file.name word_path = word_file.name pdf_text = extract_text_pdf_raw(pdf_path) word_data = extract_red_text_with_labels(word_path) # Flatten red text entries red_values = [] for values in word_data.values(): red_values.extend(values) red_values = list(set(red_values)) # dedupe # Match red values to PDF replacements = [] for val in red_values: match = find_best_match(val, pdf_text) if match: replacements.append(match) # Replace in Word updated_doc_path = replace_red_text_in_doc(word_path, replacements) return updated_doc_path gr.Interface( fn=process_files, inputs=[ gr.File(label="Upload PDF File", type="file"), gr.File(label="Upload Word File", type="file") ], outputs=gr.File(label="Download Updated Word File"), title="Red Text Replacer", description="Upload a PDF and Word document. Red-colored text in the Word doc will be replaced by matching content from the PDF." ).launch()