Spaces:

prathameshv07
/

Multilingual-Audio-Intelligence-System

Running

App Files Files Community

Prathamesh Sarjerao Vaidya commited on Aug 18

Commit

6512a42

1 Parent(s): 6a90a55

fix mermaid & margin issue, & modularized the workflow

Browse files

Files changed (9) hide show

.github/workflows/check.yml +5 -667
.github/workflows/main.yml +5 -668
.github/workflows/puppeteer-config.json +3 -0
.github/workflows/scripts/convert_md_to_pdf.sh +112 -0
.github/workflows/scripts/latex-header.tex +42 -0
.github/workflows/scripts/preprocess_markdown.py +165 -0
.github/workflows/scripts/setup_system.sh +38 -0
.github/workflows/scripts/styles.css +109 -0
.github/workflows/scripts/upload_to_drive.py +135 -0

.github/workflows/check.yml CHANGED Viewed

@@ -25,543 +25,22 @@ jobs:
         with:
           lfs: true
-      # Pull LFS files
       - name: Pull LFS files
         run: |
           git lfs install
           git lfs pull
-      # Setup Python
       - name: Setup Python
         uses: actions/setup-python@v4
         with:
           python-version: '3.11'
-      # Install system dependencies
-      - name: Install system dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install -y \
-            texlive-full \
-            texlive-xetex \
-            texlive-luatex \
-            pandoc \
-            librsvg2-bin \
-            python3-pip \
-            nodejs \
-            npm \
-            imagemagick \
-            ghostscript \
-            wkhtmltopdf
-      # Install Node.js dependencies for Mermaid
-      - name: Install Node.js dependencies for Mermaid
-        run: |
-          npm install -g @mermaid-js/mermaid-cli
-          npm install -g puppeteer
-          # Set up chrome for mermaid-cli in GitHub Actions
-          sudo apt-get install -y google-chrome-stable
-      # Install Python dependencies
-      - name: Install Python dependencies
-        run: |
-          pip install --upgrade pip
-          pip install \
-            weasyprint \
-            markdown \
-            pymdown-extensions \
-            pillow \
-            cairosvg \
-            pdfkit \
-            google-auth \
-            google-auth-oauthlib \
-            google-auth-httplib2 \
-            google-api-python-client
-      # Create LaTeX header for better image handling
-      - name: Create LaTeX header for better image handling
-        run: |
-          cat > latex-header.tex << 'EOF'
-          \usepackage{graphicx}
-          \usepackage{float}
-          \usepackage{adjustbox}
-          \usepackage{caption}
-          \usepackage{subcaption}
-          \usepackage{geometry}
-          \usepackage{fancyhdr}
-          \usepackage{xcolor}
-          \usepackage{hyperref}
-          \usepackage{fontspec}
-          \usepackage{unicode-math}
-          % Set fonts with emoji support
-          \setmainfont{DejaVu Sans}
-          \setsansfont{DejaVu Sans}
-          \setmonofont{DejaVu Sans Mono}
-          % Try to set a font with emoji support as fallback
-          \newfontfamily\emojifont{Apple Color Emoji}[Renderer=Harfbuzz]
-          % Better image positioning and scaling
-          \floatplacement{figure}{H}
-          \renewcommand{\includegraphics}[2][]{\adjustbox{max width=\textwidth,center}{\oldincludegraphics[#1]{#2}}}
-          \let\oldincludegraphics\includegraphics
-          % Set margins
-          \geometry{margin=1in}
-          % Hyperlink colors
-          \hypersetup{
-              colorlinks=true,
-              linkcolor=blue,
-              urlcolor=blue,
-              citecolor=blue
-          }
-          EOF
-      # Create enhanced CSS for HTML conversion
-      - name: Create enhanced CSS for HTML conversion
-        run: |
-          cat > styles.css << 'EOF'
-          body {
-              font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
-              max-width: 210mm;
-              margin: 0 auto;
-              padding: 20mm;
-              line-height: 1.6;
-              color: #333;
-              background: white;
-          }
-          img {
-              max-width: 100%;
-              height: auto;
-              display: block;
-              margin: 1em auto;
-              border-radius: 4px;
-              box-shadow: 0 2px 8px rgba(0,0,0,0.1);
-          }
-          pre {
-              background: #f8f9fa;
-              padding: 1em;
-              border-radius: 6px;
-              border-left: 4px solid #007acc;
-              overflow-x: auto;
-              font-family: 'Consolas', 'Monaco', 'Courier New', monospace;
-              font-size: 0.9em;
-          }
-          code {
-              background: #f1f3f4;
-              padding: 0.2em 0.4em;
-              border-radius: 3px;
-              font-family: 'Consolas', 'Monaco', 'Courier New', monospace;
-              font-size: 0.9em;
-          }
-          h1, h2, h3, h4, h5, h6 {
-              color: #2c3e50;
-              margin-top: 2em;
-              margin-bottom: 1em;
-              page-break-after: avoid;
-          }
-          h1 {
-              border-bottom: 3px solid #3498db;
-              padding-bottom: 0.5em;
-          }
-          h2 {
-              border-bottom: 2px solid #95a5a6;
-              padding-bottom: 0.3em;
-          }
-          table {
-              border-collapse: collapse;
-              width: 100%;
-              margin: 1em 0;
-          }
-          th, td {
-              border: 1px solid #ddd;
-              padding: 0.75em;
-              text-align: left;
-          }
-          th {
-              background-color: #f8f9fa;
-              font-weight: bold;
-          }
-          blockquote {
-              border-left: 4px solid #3498db;
-              margin: 1em 0;
-              padding: 0.5em 1em;
-              background: #f8f9fa;
-              border-radius: 0 4px 4px 0;
-          }
-          .mermaid-container {
-              text-align: center;
-              margin: 2em 0;
-              page-break-inside: avoid;
-          }
-          .mermaid-container img {
-              max-width: 100%;
-              height: auto;
-          }
-          @media print {
-              body {
-                  margin: 0;
-                  padding: 15mm;
-              }
-              img {
-                  max-height: 80vh;
-                  page-break-inside: avoid;
-              }
-              h1, h2, h3, h4, h5, h6 {
-                  page-break-after: avoid;
-              }
-              pre, blockquote {
-                  page-break-inside: avoid;
-              }
-          }
-          EOF
-      # Fixed preprocessing script with no-sandbox mermaid
-      - name: Create preprocessing script
-        run: |
-          cat > preprocess_markdown.py << 'EOF'
-          #!/usr/bin/env python3
-          import re
-          import os
-          import sys
-          import subprocess
-          from pathlib import Path
-          def process_mermaid_diagrams(content, file_dir):
-              """Convert mermaid diagrams to images"""
-              mermaid_pattern = r'```mermaid\n(.*?)\n```'
-              def replace_mermaid(match):
-                  mermaid_code = match.group(1)
-                  # Create a unique filename for this diagram
-                  diagram_hash = str(abs(hash(mermaid_code)))
-                  mermaid_file = f"{file_dir}/mermaid_{diagram_hash}.mmd"
-                  svg_file = f"{file_dir}/mermaid_{diagram_hash}.svg"
-                  png_file = f"{file_dir}/mermaid_{diagram_hash}.png"
-                  # Write mermaid code to file
-                  try:
-                      with open(mermaid_file, 'w', encoding='utf-8') as f:
-                          f.write(mermaid_code)
-                  except Exception as e:
-                      print(f"Error writing mermaid file: {e}")
-                      return f'\n```\n{mermaid_code}\n```\n'
-                  try:
-                      # Convert to SVG first with no-sandbox flags
-                      result = subprocess.run([
-                          'mmdc', '-i', mermaid_file, '-o', svg_file,
-                          '--theme', 'default', '--backgroundColor', 'white',
-                          '--puppeteerConfig', '{"args": ["--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage"]}'
-                      ], check=True, capture_output=True, text=True)
-                      # Convert SVG to PNG for better PDF compatibility
-                      subprocess.run([
-                          'rsvg-convert', '-f', 'png', '-o', png_file,
-                          '--width', '1200', '--height', '800', svg_file
-                      ], check=True, capture_output=True, text=True)
-                      # Clean up intermediate files
-                      try:
-                          os.remove(mermaid_file)
-                          if os.path.exists(svg_file):
-                              os.remove(svg_file)
-                      except:
-                          pass
-                      # Return markdown image syntax
-                      return f'\n<div class="mermaid-container">\n\n![Architecture Diagram]({os.path.basename(png_file)})\n\n</div>\n'
-                  except subprocess.CalledProcessError as e:
-                      print(f"Error converting mermaid diagram: {e}")
-                      print(f"Command output: {e.stderr if e.stderr else 'No stderr'}")
-                      # Clean up files on error
-                      try:
-                          os.remove(mermaid_file)
-                      except:
-                          pass
-                      return f'\n```\n{mermaid_code}\n```\n'
-                  except Exception as e:
-                      print(f"Unexpected error with mermaid: {e}")
-                      try:
-                          os.remove(mermaid_file)
-                      except:
-                          pass
-                      return f'\n```\n{mermaid_code}\n```\n'
-              return re.sub(mermaid_pattern, replace_mermaid, content, flags=re.DOTALL)
-          def clean_emojis_and_fix_images(content, file_dir):
-              """Remove/replace emojis and fix image paths"""
-              # Remove or replace problematic emojis that cause LaTeX issues
-              emoji_replacements = {
-                  '🎵': '[Audio]',
-                  '🎬': '[Video]',
-                  '📝': '[Document]',
-                  '📊': '[Analytics]',
-                  '🧠': '[AI]',
-                  '🎥': '[Media]',
-                  '📄': '[File]'
-              }
-              for emoji, replacement in emoji_replacements.items():
-                  content = content.replace(emoji, replacement)
-              # Pattern to match markdown images
-              img_pattern = r'!\[([^\]]*)\]\(([^)]+)\)'
-              def replace_image(match):
-                  alt_text = match.group(1)
-                  img_path = match.group(2)
-                  # Handle relative paths
-                  if not img_path.startswith(('http://', 'https://', '/')):
-                      # Make path relative to the markdown file
-                      abs_img_path = os.path.join(file_dir, img_path)
-                      if os.path.exists(abs_img_path):
-                          img_path = os.path.relpath(abs_img_path, file_dir)
-                  # Add HTML img tag with better control
-                  return f'<img src="{img_path}" alt="{alt_text}" style="max-width: 100%; height: auto; display: block; margin: 1em auto;" />'
-              content = re.sub(img_pattern, replace_image, content)
-              # Fix existing HTML img tags
-              content = re.sub(
-                  r'<img\s+([^>]*?)\s*/?>',
-                  lambda m: f'<img {m.group(1)} style="max-width: 100%; height: auto; display: block; margin: 1em auto;" />',
-                  content
-              )
-              return content
-          def main():
-              if len(sys.argv) != 2:
-                  print("Usage: python preprocess_markdown.py <markdown_file>")
-                  sys.exit(1)
-              md_file = sys.argv[1]
-              if not os.path.exists(md_file):
-                  print(f"Error: File {md_file} does not exist")
-                  sys.exit(1)
-              try:
-                  file_dir = os.path.dirname(os.path.abspath(md_file))
-                  with open(md_file, 'r', encoding='utf-8') as f:
-                      content = f.read()
-                  print(f"Processing file: {md_file}")
-                  print(f"File directory: {file_dir}")
-                  print(f"Content length: {len(content)} characters")
-                  # Process mermaid diagrams
-                  content = process_mermaid_diagrams(content, file_dir)
-                  print(f"Mermaid processing complete. Content length: {len(content)}")
-                  # Clean emojis and fix image paths
-                  content = clean_emojis_and_fix_images(content, file_dir)
-                  print(f"Image path fixing complete. Content length: {len(content)}")
-                  # Write processed content
-                  processed_file = md_file.replace('.md', '_processed.md')
-                  with open(processed_file, 'w', encoding='utf-8') as f:
-                      f.write(content)
-                  print(f"Processed file saved as: {processed_file}")
-                  print(processed_file)
-              except Exception as e:
-                  print(f"Error processing {md_file}: {e}")
-                  import traceback
-                  traceback.print_exc()
-                  sys.exit(1)
-          if __name__ == "__main__":
-              main()
-          EOF
-          chmod +x preprocess_markdown.py
-      # Convert MD to PDF with enhanced processing
-      - name: Convert MD to PDF with enhanced processing
-        run: |
-          find . -name "*.md" -not -path "./.git/*" | while read file; do
-            # Get the directory and filename
-            dir="$(dirname "$file")"
-            filename="$(basename "$file" .md)"
-            pdf_path="$dir/$filename.pdf"
-            echo "Processing $file..."
-            echo "Directory: $dir"
-            echo "Filename: $filename"
-            echo "PDF path: $pdf_path"
-            # Check if file exists and is readable
-            if [ ! -f "$file" ]; then
-              echo "ERROR: File $file does not exist"
-              continue
-            fi
-            if [ ! -r "$file" ]; then
-              echo "ERROR: File $file is not readable"
-              continue
-            fi
-            # Show file info for debugging
-            echo "File size: $(wc -c < "$file") bytes"
-            echo "File permissions: $(ls -la "$file")"
-            # Preprocess the markdown file
-            cd "$dir"
-            echo "Changed to directory: $(pwd)"
-            echo "Running preprocessing script..."
-            # Debug: Check if preprocessing script exists and is executable
-            if [ ! -f "$GITHUB_WORKSPACE/preprocess_markdown.py" ]; then
-              echo "ERROR: Preprocessing script not found at $GITHUB_WORKSPACE/preprocess_markdown.py"
-              processed_file="$(basename "$file")"
-            elif [ ! -x "$GITHUB_WORKSPACE/preprocess_markdown.py" ]; then
-              echo "WARNING: Preprocessing script is not executable, trying anyway..."
-              processed_file=$(python3 "$GITHUB_WORKSPACE/preprocess_markdown.py" "$(basename "$file")" 2>&1)
-              if [ $? -ne 0 ]; then
-                echo "Preprocessing failed with output: $processed_file"
-                processed_file="$(basename "$file")"
-              else
-                echo "Preprocessing succeeded: $processed_file"
-              fi
-            else
-              processed_file=$(python3 "$GITHUB_WORKSPACE/preprocess_markdown.py" "$(basename "$file")" 2>&1)
-              exit_code=$?
-              echo "Preprocessing exit code: $exit_code"
-              echo "Preprocessing output: $processed_file"
-              if [ $exit_code -ne 0 ]; then
-                echo "Preprocessing failed, using original file"
-                processed_file="$(basename "$file")"
-              fi
-            fi
-            # Verify processed file exists
-            if [ ! -f "$processed_file" ]; then
-              echo "Processed file $processed_file does not exist, using original"
-              processed_file="$(basename "$file")"
-            fi
-            echo "Using file for conversion: $processed_file"
-            # Check if pandoc is available
-            if ! command -v pandoc &> /dev/null; then
-              echo "ERROR: pandoc is not installed or not in PATH"
-              continue
-            fi
-            echo "Converting $processed_file to $pdf_path"
-            # Method 1: Try XeLaTeX with enhanced settings
-            pandoc "$processed_file" \
-              -o "$pdf_path" \
-              --pdf-engine=xelatex \
-              --include-in-header="$GITHUB_WORKSPACE/latex-header.tex" \
-              --variable mainfont="DejaVu Sans" \
-              --variable sansfont="DejaVu Sans" \
-              --variable monofont="DejaVu Sans Mono" \
-              --variable geometry:margin=1in \
-              --variable colorlinks=true \
-              --variable linkcolor=blue \
-              --variable urlcolor=blue \
-              --variable toccolor=gray \
-              --resource-path="$dir:$GITHUB_WORKSPACE" \
-              --standalone \
-              --toc \
-              --number-sections \
-              --highlight-style=pygments \
-              --wrap=auto \
-              --dpi=300 \
-              --verbose 2>&1 || {
-              echo "XeLaTeX failed, trying HTML->PDF conversion..."
-              # Method 2: HTML to PDF conversion with WeasyPrint
-              pandoc "$processed_file" \
-                -t html5 \
-                --standalone \
-                --embed-resources \
-                --css="$GITHUB_WORKSPACE/styles.css" \
-                --toc \
-                --number-sections \
-                --highlight-style=pygments \
-                -o "$dir/$filename.html" 2>&1
-              if [ -f "$dir/$filename.html" ]; then
-                echo "HTML file created, attempting WeasyPrint conversion..."
-                weasyprint "$dir/$filename.html" "$pdf_path" --presentational-hints 2>&1 || {
-                  echo "WeasyPrint failed, trying wkhtmltopdf..."
-                  # Method 3: wkhtmltopdf as final fallback
-                  wkhtmltopdf \
-                    --page-size A4 \
-                    --margin-top 0.75in \
-                    --margin-right 0.75in \
-                    --margin-bottom 0.75in \
-                    --margin-left 0.75in \
-                    --encoding UTF-8 \
-                    --no-outline \
-                    --enable-local-file-access \
-                    "$dir/$filename.html" "$pdf_path" 2>&1 || {
-                    echo "All conversion methods failed for $file"
-                    continue
-                  }
-                }
-                # Clean up HTML file
-                rm -f "$dir/$filename.html"
-              else
-                echo "Failed to create HTML file for $file"
-                continue
-              fi
-            }
-            # Clean up processed file if it's different from original
-            if [ "$processed_file" != "$(basename "$file")" ]; then
-              rm -f "$processed_file"
-              echo "Cleaned up processed file: $processed_file"
-            fi
-            # Clean up generated mermaid images
-            rm -f mermaid_*.png mermaid_*.svg mermaid_*.mmd
-            if [ -f "$pdf_path" ]; then
-              echo "✅ Successfully converted $file to $pdf_path"
-              echo "PDF file size: $(wc -c < "$pdf_path") bytes"
-            else
-              echo "❌ Failed to convert $file"
-            fi
-            # Return to original directory
-            cd "$GITHUB_WORKSPACE"
-          done
-      # Upload PDF artifacts
       - name: Upload PDF artifacts
         uses: actions/upload-artifact@v4
         with:
@@ -569,148 +48,7 @@ jobs:
           path: "**/*.pdf"
           retention-days: 30
-      # Upload to Google Drive
       - name: Upload to Google Drive
         env:
           GOOGLE_OAUTH_TOKEN: ${{ secrets.GOOGLE_OAUTH_TOKEN }}
-        run: |
-          cat > upload_to_drive.py << 'EOF'
-          import os
-          import json
-          from google.oauth2.credentials import Credentials
-          from google.auth.transport.requests import Request
-          from googleapiclient.discovery import build
-          from googleapiclient.http import MediaFileUpload
-          import mimetypes
-          # Load OAuth credentials from environment
-          oauth_token_json = os.environ['GOOGLE_OAUTH_TOKEN']
-          token_info = json.loads(oauth_token_json)
-          # Create credentials from the token info
-          credentials = Credentials.from_authorized_user_info(token_info)
-          # Refresh the token if needed
-          if credentials.expired and credentials.refresh_token:
-              credentials.refresh(Request())
-          # Build the Drive service
-          service = build('drive', 'v3', credentials=credentials)
-          # Target folder ID - This is where files will be uploaded
-          FOLDER_ID = '1-8HJcWxsUUQIj9OMXQeoeULS06RA9Hg9'
-          def get_mime_type(file_path):
-              mime_type, _ = mimetypes.guess_type(file_path)
-              return mime_type or 'application/octet-stream'
-          def upload_file(file_path, parent_folder_id, drive_service):
-              file_name = os.path.basename(file_path)
-              # Check if file already exists in the specific folder
-              query = f"name='{file_name}' and '{parent_folder_id}' in parents and trashed=false"
-              results = drive_service.files().list(q=query).execute()
-              items = results.get('files', [])
-              media = MediaFileUpload(file_path, mimetype=get_mime_type(file_path), resumable=True)
-              if items:
-                  # Update existing file
-                  file_id = items[0]['id']
-                  updated_file = drive_service.files().update(
-                      fileId=file_id,
-                      media_body=media
-                  ).execute()
-                  print(f'Updated: {file_name} (ID: {updated_file.get("id")})')
-              else:
-                  # Create new file
-                  file_metadata = {
-                      'name': file_name,
-                      'parents': [parent_folder_id]
-                  }
-                  file = drive_service.files().create(
-                      body=file_metadata,
-                      media_body=media,
-                      fields='id'
-                  ).execute()
-                  print(f'Uploaded: {file_name} (ID: {file.get("id")})')
-          def create_folder_if_not_exists(folder_name, parent_folder_id, drive_service):
-              """Create a folder if it doesn't exist and return its ID"""
-              # Check if folder already exists
-              query = f"name='{folder_name}' and '{parent_folder_id}' in parents and mimeType='application/vnd.google-apps.folder' and trashed=false"
-              results = drive_service.files().list(q=query).execute()
-              items = results.get('files', [])
-              if items:
-                  return items[0]['id']
-              else:
-                  # Create new folder
-                  folder_metadata = {
-                      'name': folder_name,
-                      'parents': [parent_folder_id],
-                      'mimeType': 'application/vnd.google-apps.folder'
-                  }
-                  folder = drive_service.files().create(body=folder_metadata, fields='id').execute()
-                  print(f'Created folder: {folder_name} (ID: {folder.get("id")})')
-                  return folder.get('id')
-          def upload_directory(local_path, parent_folder_id, drive_service, exclude_dirs=None, exclude_files=None):
-              if exclude_dirs is None:
-                  exclude_dirs = ['.git', '.github', 'node_modules', '__pycache__']
-              if exclude_files is None:
-                  exclude_files = ['*.md']  # Skip markdown files
-              import fnmatch
-              for root, dirs, files in os.walk(local_path):
-                  # Remove excluded directories
-                  dirs[:] = [d for d in dirs if d not in exclude_dirs]
-                  # Calculate relative path from the root
-                  rel_path = os.path.relpath(root, local_path)
-                  current_folder_id = parent_folder_id
-                  # Create nested folders if needed
-                  if rel_path != '.':
-                      path_parts = rel_path.split(os.sep)
-                      for part in path_parts:
-                          current_folder_id = create_folder_if_not_exists(part, current_folder_id, drive_service)
-                  # Upload files in current directory
-                  for file in files:
-                      # Skip excluded file patterns (like *.md)
-                      should_skip = False
-                      for pattern in exclude_files:
-                          if fnmatch.fnmatch(file, pattern):
-                              should_skip = True
-                              break
-                      if should_skip:
-                          print(f'Skipping {file} (excluded file type)')
-                          continue
-                      file_path = os.path.join(root, file)
-                      try:
-                          upload_file(file_path, current_folder_id, drive_service)
-                      except Exception as e:
-                          print(f'Error uploading {file_path}: {e}')
-          # Test folder permissions first
-          try:
-              # Try to list files in the target folder to verify access
-              test_query = f"'{FOLDER_ID}' in parents and trashed=false"
-              test_results = service.files().list(q=test_query, pageSize=1).execute()
-              print(f"Successfully accessed folder. Found {len(test_results.get('files', []))} items (showing 1 max)")
-          except Exception as e:
-              print(f"ERROR: Cannot access folder {FOLDER_ID}. Error: {e}")
-              exit(1)
-          # Upload all files to Google Drive (excluding MD files)
-          print("Starting upload to Google Drive...")
-          upload_directory('.', FOLDER_ID, service)
-          print("Upload completed - MD files were skipped, PDFs were uploaded!")
-          EOF
-          python upload_to_drive.py

         with:
           lfs: true
       - name: Pull LFS files
         run: |
           git lfs install
           git lfs pull
       - name: Setup Python
         uses: actions/setup-python@v4
         with:
           python-version: '3.11'
+      - name: Setup system dependencies
+        run: chmod +x .github/workflows/scripts/setup_system.sh && .github/workflows/scripts/setup_system.sh
+      - name: Convert MD to PDF
+        run: chmod +x .github/workflows/scripts/convert_md_to_pdf.sh && .github/workflows/scripts/convert_md_to_pdf.sh
       - name: Upload PDF artifacts
         uses: actions/upload-artifact@v4
         with:
           path: "**/*.pdf"
           retention-days: 30
       - name: Upload to Google Drive
         env:
           GOOGLE_OAUTH_TOKEN: ${{ secrets.GOOGLE_OAUTH_TOKEN }}
+        run: python .github/workflows/scripts/upload_to_drive.py

.github/workflows/main.yml CHANGED Viewed

@@ -13,543 +13,22 @@ jobs:
           fetch-depth: 0
           lfs: true
-      # Ensure Git LFS is installed and fetch binary files
       - name: Pull LFS files
         run: |
           git lfs install
           git lfs pull
-      # Setup Python
       - name: Setup Python
         uses: actions/setup-python@v4
         with:
           python-version: '3.11'
-      # Install system dependencies
-      - name: Install system dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install -y \
-            texlive-full \
-            texlive-xetex \
-            texlive-luatex \
-            pandoc \
-            librsvg2-bin \
-            python3-pip \
-            nodejs \
-            npm \
-            imagemagick \
-            ghostscript \
-            wkhtmltopdf
-      # Install Node.js dependencies for Mermaid
-      - name: Install Node.js dependencies for Mermaid
-        run: |
-          npm install -g @mermaid-js/mermaid-cli
-          npm install -g puppeteer
-          # Set up chrome for mermaid-cli in GitHub Actions
-          sudo apt-get install -y google-chrome-stable
-      # Install Python dependencies
-      - name: Install Python dependencies
-        run: |
-          pip install --upgrade pip
-          pip install \
-            weasyprint \
-            markdown \
-            pymdown-extensions \
-            pillow \
-            cairosvg \
-            pdfkit \
-            google-auth \
-            google-auth-oauthlib \
-            google-auth-httplib2 \
-            google-api-python-client
-      # Create LaTeX header for better image handling
-      - name: Create LaTeX header for better image handling
-        run: |
-          cat > latex-header.tex << 'EOF'
-          \usepackage{graphicx}
-          \usepackage{float}
-          \usepackage{adjustbox}
-          \usepackage{caption}
-          \usepackage{subcaption}
-          \usepackage{geometry}
-          \usepackage{fancyhdr}
-          \usepackage{xcolor}
-          \usepackage{hyperref}
-          \usepackage{fontspec}
-          \usepackage{unicode-math}
-          % Set fonts with emoji support
-          \setmainfont{DejaVu Sans}
-          \setsansfont{DejaVu Sans}
-          \setmonofont{DejaVu Sans Mono}
-          % Try to set a font with emoji support as fallback
-          \newfontfamily\emojifont{Apple Color Emoji}[Renderer=Harfbuzz]
-          % Better image positioning and scaling
-          \floatplacement{figure}{H}
-          \renewcommand{\includegraphics}[2][]{\adjustbox{max width=\textwidth,center}{\oldincludegraphics[#1]{#2}}}
-          \let\oldincludegraphics\includegraphics
-          % Set margins
-          \geometry{margin=1in}
-          % Hyperlink colors
-          \hypersetup{
-              colorlinks=true,
-              linkcolor=blue,
-              urlcolor=blue,
-              citecolor=blue
-          }
-          EOF
-      # Create enhanced CSS for HTML conversion
-      - name: Create enhanced CSS for HTML conversion
-        run: |
-          cat > styles.css << 'EOF'
-          body {
-              font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
-              max-width: 210mm;
-              margin: 0 auto;
-              padding: 20mm;
-              line-height: 1.6;
-              color: #333;
-              background: white;
-          }
-          img {
-              max-width: 100%;
-              height: auto;
-              display: block;
-              margin: 1em auto;
-              border-radius: 4px;
-              box-shadow: 0 2px 8px rgba(0,0,0,0.1);
-          }
-          pre {
-              background: #f8f9fa;
-              padding: 1em;
-              border-radius: 6px;
-              border-left: 4px solid #007acc;
-              overflow-x: auto;
-              font-family: 'Consolas', 'Monaco', 'Courier New', monospace;
-              font-size: 0.9em;
-          }
-          code {
-              background: #f1f3f4;
-              padding: 0.2em 0.4em;
-              border-radius: 3px;
-              font-family: 'Consolas', 'Monaco', 'Courier New', monospace;
-              font-size: 0.9em;
-          }
-          h1, h2, h3, h4, h5, h6 {
-              color: #2c3e50;
-              margin-top: 2em;
-              margin-bottom: 1em;
-              page-break-after: avoid;
-          }
-          h1 {
-              border-bottom: 3px solid #3498db;
-              padding-bottom: 0.5em;
-          }
-          h2 {
-              border-bottom: 2px solid #95a5a6;
-              padding-bottom: 0.3em;
-          }
-          table {
-              border-collapse: collapse;
-              width: 100%;
-              margin: 1em 0;
-          }
-          th, td {
-              border: 1px solid #ddd;
-              padding: 0.75em;
-              text-align: left;
-          }
-          th {
-              background-color: #f8f9fa;
-              font-weight: bold;
-          }
-          blockquote {
-              border-left: 4px solid #3498db;
-              margin: 1em 0;
-              padding: 0.5em 1em;
-              background: #f8f9fa;
-              border-radius: 0 4px 4px 0;
-          }
-          .mermaid-container {
-              text-align: center;
-              margin: 2em 0;
-              page-break-inside: avoid;
-          }
-          .mermaid-container img {
-              max-width: 100%;
-              height: auto;
-          }
-          @media print {
-              body {
-                  margin: 0;
-                  padding: 15mm;
-              }
-              img {
-                  max-height: 80vh;
-                  page-break-inside: avoid;
-              }
-              h1, h2, h3, h4, h5, h6 {
-                  page-break-after: avoid;
-              }
-              pre, blockquote {
-                  page-break-inside: avoid;
-              }
-          }
-          EOF
-      # Fixed preprocessing script with no-sandbox mermaid
-      - name: Create preprocessing script
-        run: |
-          cat > preprocess_markdown.py << 'EOF'
-          #!/usr/bin/env python3
-          import re
-          import os
-          import sys
-          import subprocess
-          from pathlib import Path
-          def process_mermaid_diagrams(content, file_dir):
-              """Convert mermaid diagrams to images"""
-              mermaid_pattern = r'```mermaid\n(.*?)\n```'
-              def replace_mermaid(match):
-                  mermaid_code = match.group(1)
-                  # Create a unique filename for this diagram
-                  diagram_hash = str(abs(hash(mermaid_code)))
-                  mermaid_file = f"{file_dir}/mermaid_{diagram_hash}.mmd"
-                  svg_file = f"{file_dir}/mermaid_{diagram_hash}.svg"
-                  png_file = f"{file_dir}/mermaid_{diagram_hash}.png"
-                  # Write mermaid code to file
-                  try:
-                      with open(mermaid_file, 'w', encoding='utf-8') as f:
-                          f.write(mermaid_code)
-                  except Exception as e:
-                      print(f"Error writing mermaid file: {e}")
-                      return f'\n```\n{mermaid_code}\n```\n'
-                  try:
-                      # Convert to SVG first with no-sandbox flags
-                      result = subprocess.run([
-                          'mmdc', '-i', mermaid_file, '-o', svg_file,
-                          '--theme', 'default', '--backgroundColor', 'white',
-                          '--puppeteerConfig', '{"args": ["--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage"]}'
-                      ], check=True, capture_output=True, text=True)
-                      # Convert SVG to PNG for better PDF compatibility
-                      subprocess.run([
-                          'rsvg-convert', '-f', 'png', '-o', png_file,
-                          '--width', '1200', '--height', '800', svg_file
-                      ], check=True, capture_output=True, text=True)
-                      # Clean up intermediate files
-                      try:
-                          os.remove(mermaid_file)
-                          if os.path.exists(svg_file):
-                              os.remove(svg_file)
-                      except:
-                          pass
-                      # Return markdown image syntax
-                      return f'\n<div class="mermaid-container">\n\n![Architecture Diagram]({os.path.basename(png_file)})\n\n</div>\n'
-                  except subprocess.CalledProcessError as e:
-                      print(f"Error converting mermaid diagram: {e}")
-                      print(f"Command output: {e.stderr if e.stderr else 'No stderr'}")
-                      # Clean up files on error
-                      try:
-                          os.remove(mermaid_file)
-                      except:
-                          pass
-                      return f'\n```\n{mermaid_code}\n```\n'
-                  except Exception as e:
-                      print(f"Unexpected error with mermaid: {e}")
-                      try:
-                          os.remove(mermaid_file)
-                      except:
-                          pass
-                      return f'\n```\n{mermaid_code}\n```\n'
-              return re.sub(mermaid_pattern, replace_mermaid, content, flags=re.DOTALL)
-          def clean_emojis_and_fix_images(content, file_dir):
-              """Remove/replace emojis and fix image paths"""
-              # Remove or replace problematic emojis that cause LaTeX issues
-              emoji_replacements = {
-                  '🎵': '[Audio]',
-                  '🎬': '[Video]',
-                  '📝': '[Document]',
-                  '📊': '[Analytics]',
-                  '🧠': '[AI]',
-                  '🎥': '[Media]',
-                  '📄': '[File]'
-              }
-              for emoji, replacement in emoji_replacements.items():
-                  content = content.replace(emoji, replacement)
-              # Pattern to match markdown images
-              img_pattern = r'!\[([^\]]*)\]\(([^)]+)\)'
-              def replace_image(match):
-                  alt_text = match.group(1)
-                  img_path = match.group(2)
-                  # Handle relative paths
-                  if not img_path.startswith(('http://', 'https://', '/')):
-                      # Make path relative to the markdown file
-                      abs_img_path = os.path.join(file_dir, img_path)
-                      if os.path.exists(abs_img_path):
-                          img_path = os.path.relpath(abs_img_path, file_dir)
-                  # Add HTML img tag with better control
-                  return f'<img src="{img_path}" alt="{alt_text}" style="max-width: 100%; height: auto; display: block; margin: 1em auto;" />'
-              content = re.sub(img_pattern, replace_image, content)
-              # Fix existing HTML img tags
-              content = re.sub(
-                  r'<img\s+([^>]*?)\s*/?>',
-                  lambda m: f'<img {m.group(1)} style="max-width: 100%; height: auto; display: block; margin: 1em auto;" />',
-                  content
-              )
-              return content
-          def main():
-              if len(sys.argv) != 2:
-                  print("Usage: python preprocess_markdown.py <markdown_file>")
-                  sys.exit(1)
-              md_file = sys.argv[1]
-              if not os.path.exists(md_file):
-                  print(f"Error: File {md_file} does not exist")
-                  sys.exit(1)
-              try:
-                  file_dir = os.path.dirname(os.path.abspath(md_file))
-                  with open(md_file, 'r', encoding='utf-8') as f:
-                      content = f.read()
-                  print(f"Processing file: {md_file}")
-                  print(f"File directory: {file_dir}")
-                  print(f"Content length: {len(content)} characters")
-                  # Process mermaid diagrams
-                  content = process_mermaid_diagrams(content, file_dir)
-                  print(f"Mermaid processing complete. Content length: {len(content)}")
-                  # Clean emojis and fix image paths
-                  content = clean_emojis_and_fix_images(content, file_dir)
-                  print(f"Image path fixing complete. Content length: {len(content)}")
-                  # Write processed content
-                  processed_file = md_file.replace('.md', '_processed.md')
-                  with open(processed_file, 'w', encoding='utf-8') as f:
-                      f.write(content)
-                  print(f"Processed file saved as: {processed_file}")
-                  print(processed_file)
-              except Exception as e:
-                  print(f"Error processing {md_file}: {e}")
-                  import traceback
-                  traceback.print_exc()
-                  sys.exit(1)
-          if __name__ == "__main__":
-              main()
-          EOF
-          chmod +x preprocess_markdown.py
-      # Convert MD to PDF with enhanced processing
-      - name: Convert MD to PDF with enhanced processing
-        run: |
-          find . -name "*.md" -not -path "./.git/*" | while read file; do
-            # Get the directory and filename
-            dir="$(dirname "$file")"
-            filename="$(basename "$file" .md)"
-            pdf_path="$dir/$filename.pdf"
-            echo "Processing $file..."
-            echo "Directory: $dir"
-            echo "Filename: $filename"
-            echo "PDF path: $pdf_path"
-            # Check if file exists and is readable
-            if [ ! -f "$file" ]; then
-              echo "ERROR: File $file does not exist"
-              continue
-            fi
-            if [ ! -r "$file" ]; then
-              echo "ERROR: File $file is not readable"
-              continue
-            fi
-            # Show file info for debugging
-            echo "File size: $(wc -c < "$file") bytes"
-            echo "File permissions: $(ls -la "$file")"
-            # Preprocess the markdown file
-            cd "$dir"
-            echo "Changed to directory: $(pwd)"
-            echo "Running preprocessing script..."
-            # Debug: Check if preprocessing script exists and is executable
-            if [ ! -f "$GITHUB_WORKSPACE/preprocess_markdown.py" ]; then
-              echo "ERROR: Preprocessing script not found at $GITHUB_WORKSPACE/preprocess_markdown.py"
-              processed_file="$(basename "$file")"
-            elif [ ! -x "$GITHUB_WORKSPACE/preprocess_markdown.py" ]; then
-              echo "WARNING: Preprocessing script is not executable, trying anyway..."
-              processed_file=$(python3 "$GITHUB_WORKSPACE/preprocess_markdown.py" "$(basename "$file")" 2>&1)
-              if [ $? -ne 0 ]; then
-                echo "Preprocessing failed with output: $processed_file"
-                processed_file="$(basename "$file")"
-              else
-                echo "Preprocessing succeeded: $processed_file"
-              fi
-            else
-              processed_file=$(python3 "$GITHUB_WORKSPACE/preprocess_markdown.py" "$(basename "$file")" 2>&1)
-              exit_code=$?
-              echo "Preprocessing exit code: $exit_code"
-              echo "Preprocessing output: $processed_file"
-              if [ $exit_code -ne 0 ]; then
-                echo "Preprocessing failed, using original file"
-                processed_file="$(basename "$file")"
-              fi
-            fi
-            # Verify processed file exists
-            if [ ! -f "$processed_file" ]; then
-              echo "Processed file $processed_file does not exist, using original"
-              processed_file="$(basename "$file")"
-            fi
-            echo "Using file for conversion: $processed_file"
-            # Check if pandoc is available
-            if ! command -v pandoc &> /dev/null; then
-              echo "ERROR: pandoc is not installed or not in PATH"
-              continue
-            fi
-            echo "Converting $processed_file to $pdf_path"
-            # Method 1: Try XeLaTeX with enhanced settings
-            pandoc "$processed_file" \
-              -o "$pdf_path" \
-              --pdf-engine=xelatex \
-              --include-in-header="$GITHUB_WORKSPACE/latex-header.tex" \
-              --variable mainfont="DejaVu Sans" \
-              --variable sansfont="DejaVu Sans" \
-              --variable monofont="DejaVu Sans Mono" \
-              --variable geometry:margin=1in \
-              --variable colorlinks=true \
-              --variable linkcolor=blue \
-              --variable urlcolor=blue \
-              --variable toccolor=gray \
-              --resource-path="$dir:$GITHUB_WORKSPACE" \
-              --standalone \
-              --toc \
-              --number-sections \
-              --highlight-style=pygments \
-              --wrap=auto \
-              --dpi=300 \
-              --verbose 2>&1 || {
-              echo "XeLaTeX failed, trying HTML->PDF conversion..."
-              # Method 2: HTML to PDF conversion with WeasyPrint
-              pandoc "$processed_file" \
-                -t html5 \
-                --standalone \
-                --embed-resources \
-                --css="$GITHUB_WORKSPACE/styles.css" \
-                --toc \
-                --number-sections \
-                --highlight-style=pygments \
-                -o "$dir/$filename.html" 2>&1
-              if [ -f "$dir/$filename.html" ]; then
-                echo "HTML file created, attempting WeasyPrint conversion..."
-                weasyprint "$dir/$filename.html" "$pdf_path" --presentational-hints 2>&1 || {
-                  echo "WeasyPrint failed, trying wkhtmltopdf..."
-                  # Method 3: wkhtmltopdf as final fallback
-                  wkhtmltopdf \
-                    --page-size A4 \
-                    --margin-top 0.75in \
-                    --margin-right 0.75in \
-                    --margin-bottom 0.75in \
-                    --margin-left 0.75in \
-                    --encoding UTF-8 \
-                    --no-outline \
-                    --enable-local-file-access \
-                    "$dir/$filename.html" "$pdf_path" 2>&1 || {
-                    echo "All conversion methods failed for $file"
-                    continue
-                  }
-                }
-                # Clean up HTML file
-                rm -f "$dir/$filename.html"
-              else
-                echo "Failed to create HTML file for $file"
-                continue
-              fi
-            }
-            # Clean up processed file if it's different from original
-            if [ "$processed_file" != "$(basename "$file")" ]; then
-              rm -f "$processed_file"
-              echo "Cleaned up processed file: $processed_file"
-            fi
-            # Clean up generated mermaid images
-            rm -f mermaid_*.png mermaid_*.svg mermaid_*.mmd
-            if [ -f "$pdf_path" ]; then
-              echo "✅ Successfully converted $file to $pdf_path"
-              echo "PDF file size: $(wc -c < "$pdf_path") bytes"
-            else
-              echo "❌ Failed to convert $file"
-            fi
-            # Return to original directory
-            cd "$GITHUB_WORKSPACE"
-          done
-      # Upload PDF artifacts
       - name: Upload PDF artifacts
         uses: actions/upload-artifact@v4
         with:
@@ -557,153 +36,11 @@ jobs:
           path: "**/*.pdf"
           retention-days: 30
-      # Upload to Google Drive
       - name: Upload to Google Drive
         env:
           GOOGLE_OAUTH_TOKEN: ${{ secrets.GOOGLE_OAUTH_TOKEN }}
-        run: |
-          cat > upload_to_drive.py << 'EOF'
-          import os
-          import json
-          from google.oauth2.credentials import Credentials
-          from google.auth.transport.requests import Request
-          from googleapiclient.discovery import build
-          from googleapiclient.http import MediaFileUpload
-          import mimetypes
-          # Load OAuth credentials from environment
-          oauth_token_json = os.environ['GOOGLE_OAUTH_TOKEN']
-          token_info = json.loads(oauth_token_json)
-          # Create credentials from the token info
-          credentials = Credentials.from_authorized_user_info(token_info)
-          # Refresh the token if needed
-          if credentials.expired and credentials.refresh_token:
-              credentials.refresh(Request())
-          # Build the Drive service
-          service = build('drive', 'v3', credentials=credentials)
-          # Target folder ID - This is where files will be uploaded
-          FOLDER_ID = '1-8HJcWxsUUQIj9OMXQeoeULS06RA9Hg9'
-          def get_mime_type(file_path):
-              mime_type, _ = mimetypes.guess_type(file_path)
-              return mime_type or 'application/octet-stream'
-          def upload_file(file_path, parent_folder_id, drive_service):
-              file_name = os.path.basename(file_path)
-              # Check if file already exists in the specific folder
-              query = f"name='{file_name}' and '{parent_folder_id}' in parents and trashed=false"
-              results = drive_service.files().list(q=query).execute()
-              items = results.get('files', [])
-              media = MediaFileUpload(file_path, mimetype=get_mime_type(file_path), resumable=True)
-              if items:
-                  # Update existing file
-                  file_id = items[0]['id']
-                  updated_file = drive_service.files().update(
-                      fileId=file_id,
-                      media_body=media
-                  ).execute()
-                  print(f'Updated: {file_name} (ID: {updated_file.get("id")})')
-              else:
-                  # Create new file
-                  file_metadata = {
-                      'name': file_name,
-                      'parents': [parent_folder_id]
-                  }
-                  file = drive_service.files().create(
-                      body=file_metadata,
-                      media_body=media,
-                      fields='id'
-                  ).execute()
-                  print(f'Uploaded: {file_name} (ID: {file.get("id")})')
-          def create_folder_if_not_exists(folder_name, parent_folder_id, drive_service):
-              """Create a folder if it doesn't exist and return its ID"""
-              # Check if folder already exists
-              query = f"name='{folder_name}' and '{parent_folder_id}' in parents and mimeType='application/vnd.google-apps.folder' and trashed=false"
-              results = drive_service.files().list(q=query).execute()
-              items = results.get('files', [])
-              if items:
-                  return items[0]['id']
-              else:
-                  # Create new folder
-                  folder_metadata = {
-                      'name': folder_name,
-                      'parents': [parent_folder_id],
-                      'mimeType': 'application/vnd.google-apps.folder'
-                  }
-                  folder = drive_service.files().create(body=folder_metadata, fields='id').execute()
-                  print(f'Created folder: {folder_name} (ID: {folder.get("id")})')
-                  return folder.get('id')
-          def upload_directory(local_path, parent_folder_id, drive_service, exclude_dirs=None, exclude_files=None):
-              if exclude_dirs is None:
-                  exclude_dirs = ['.git', '.github', 'node_modules', '__pycache__']
-              if exclude_files is None:
-                  exclude_files = ['*.md']  # Skip markdown files
-              import fnmatch
-              for root, dirs, files in os.walk(local_path):
-                  # Remove excluded directories
-                  dirs[:] = [d for d in dirs if d not in exclude_dirs]
-                  # Calculate relative path from the root
-                  rel_path = os.path.relpath(root, local_path)
-                  current_folder_id = parent_folder_id
-                  # Create nested folders if needed
-                  if rel_path != '.':
-                      path_parts = rel_path.split(os.sep)
-                      for part in path_parts:
-                          current_folder_id = create_folder_if_not_exists(part, current_folder_id, drive_service)
-                  # Upload files in current directory
-                  for file in files:
-                      # Skip excluded file patterns (like *.md)
-                      should_skip = False
-                      for pattern in exclude_files:
-                          if fnmatch.fnmatch(file, pattern):
-                              should_skip = True
-                              break
-                      if should_skip:
-                          print(f'Skipping {file} (excluded file type)')
-                          continue
-                      file_path = os.path.join(root, file)
-                      try:
-                          upload_file(file_path, current_folder_id, drive_service)
-                      except Exception as e:
-                          print(f'Error uploading {file_path}: {e}')
-          # Test folder permissions first
-          try:
-              # Try to list files in the target folder to verify access
-              test_query = f"'{FOLDER_ID}' in parents and trashed=false"
-              test_results = service.files().list(q=test_query, pageSize=1).execute()
-              print(f"Successfully accessed folder. Found {len(test_results.get('files', []))} items (showing 1 max)")
-          except Exception as e:
-              print(f"ERROR: Cannot access folder {FOLDER_ID}. Error: {e}")
-              exit(1)
-          # Upload all files to Google Drive (excluding MD files)
-          print("Starting upload to Google Drive...")
-          upload_directory('.', FOLDER_ID, service)
-          print("Upload completed - MD files were skipped, PDFs were uploaded!")
-          EOF
-          python upload_to_drive.py
-      # Push to Hugging Face (original functionality)
       - name: Push to Hugging Face hub
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}

           fetch-depth: 0
           lfs: true
       - name: Pull LFS files
         run: |
           git lfs install
           git lfs pull
       - name: Setup Python
         uses: actions/setup-python@v4
         with:
           python-version: '3.11'
+      - name: Setup system dependencies
+        run: chmod +x .github/workflows/scripts/setup_system.sh && .github/workflows/scripts/setup_system.sh
+      - name: Convert MD to PDF
+        run: chmod +x .github/workflows/scripts/convert_md_to_pdf.sh && .github/workflows/scripts/convert_md_to_pdf.sh
       - name: Upload PDF artifacts
         uses: actions/upload-artifact@v4
         with:
           path: "**/*.pdf"
           retention-days: 30
       - name: Upload to Google Drive
         env:
           GOOGLE_OAUTH_TOKEN: ${{ secrets.GOOGLE_OAUTH_TOKEN }}
+        run: python .github/workflows/scripts/upload_to_drive.py
       - name: Push to Hugging Face hub
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}

.github/workflows/puppeteer-config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+    "args": ["--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage"]
+}

.github/workflows/scripts/convert_md_to_pdf.sh ADDED Viewed

	@@ -0,0 +1,112 @@

+#!/bin/bash
+set -e
+SCRIPTS_DIR="$GITHUB_WORKSPACE/.github/workflows/scripts"
+echo "Converting MD files to PDF..."
+find . -name "*.md" -not -path "./.git/*" | while read file; do
+  dir="$(dirname "$file")"
+  filename="$(basename "$file" .md)"
+  pdf_path="$dir/$filename.pdf"
+  echo "Processing $file..."
+  if [ ! -f "$file" ]; then
+    echo "ERROR: File $file does not exist"
+    continue
+  fi
+  if [ ! -r "$file" ]; then
+    echo "ERROR: File $file is not readable"
+    continue
+  fi
+  echo "File size: $(wc -c < "$file") bytes"
+  # Preprocess the markdown file
+  cd "$dir"
+  processed_file=$(python3 "$SCRIPTS_DIR/preprocess_markdown.py" "$(basename "$file")" 2>&1) || {
+    echo "Preprocessing failed, using original file"
+    processed_file="$(basename "$file")"
+  }
+  if [ ! -f "$processed_file" ]; then
+    echo "Processed file $processed_file does not exist, using original"
+    processed_file="$(basename "$file")"
+  fi
+  echo "Using file for conversion: $processed_file"
+  # Method 1: Try XeLaTeX with enhanced settings
+  pandoc "$processed_file" \
+    -o "$pdf_path" \
+    --pdf-engine=xelatex \
+    --include-in-header="$SCRIPTS_DIR/latex-header.tex" \
+    --variable mainfont="DejaVu Sans" \
+    --variable sansfont="DejaVu Sans" \
+    --variable monofont="DejaVu Sans Mono" \
+    --variable geometry:top=0.5in,left=0.5in,right=0.5in,bottom=0.5in \
+    --variable colorlinks=true \
+    --variable linkcolor=blue \
+    --variable urlcolor=blue \
+    --variable toccolor=gray \
+    --resource-path="$dir:$SCRIPTS_DIR" \
+    --standalone \
+    --toc \
+    --number-sections \
+    --highlight-style=pygments \
+    --wrap=auto \
+    --dpi=300 \
+    --verbose 2>&1 || {
+    echo "XeLaTeX failed, trying HTML->PDF conversion..."
+    # Method 2: HTML to PDF conversion
+    pandoc "$processed_file" \
+      -t html5 \
+      --standalone \
+      --embed-resources \
+      --css="$SCRIPTS_DIR/styles.css" \
+      --toc \
+      --number-sections \
+      --highlight-style=pygments \
+      -o "$dir/$filename.html" 2>&1
+    if [ -f "$dir/$filename.html" ]; then
+      weasyprint "$dir/$filename.html" "$pdf_path" --presentational-hints 2>&1 || {
+        wkhtmltopdf \
+          --page-size A4 \
+          --margin-top 0.5in \
+          --margin-right 0.5in \
+          --margin-bottom 0.5in \
+          --margin-left 0.5in \
+          --encoding UTF-8 \
+          --no-outline \
+          --enable-local-file-access \
+          "$dir/$filename.html" "$pdf_path" 2>&1 || {
+          echo "All conversion methods failed for $file"
+          continue
+        }
+      }
+      rm -f "$dir/$filename.html"
+    else
+      echo "Failed to create HTML file for $file"
+      continue
+    fi
+  }
+  # Clean up
+  if [ "$processed_file" != "$(basename "$file")" ]; then
+    rm -f "$processed_file"
+  fi
+  rm -f mermaid_*.png mermaid_*.svg mermaid_*.mmd
+  if [ -f "$pdf_path" ]; then
+    echo "✅ Successfully converted $file to $pdf_path"
+    echo "PDF file size: $(wc -c < "$pdf_path") bytes"
+  else
+    echo "❌ Failed to convert $file"
+  fi
+  cd "$GITHUB_WORKSPACE"
+done

.github/workflows/scripts/latex-header.tex ADDED Viewed

	@@ -0,0 +1,42 @@

+\usepackage{graphicx}
+\usepackage{float}
+\usepackage{adjustbox}
+\usepackage{caption}
+\usepackage{subcaption}
+\usepackage{geometry}
+\usepackage{fancyhdr}
+\usepackage{xcolor}
+\usepackage{hyperref}
+\usepackage{fontspec}
+\usepackage{unicode-math}
+% Set fonts with emoji support
+\setmainfont{DejaVu Sans}
+\setsansfont{DejaVu Sans}
+\setmonofont{DejaVu Sans Mono}
+% Try to set a font with emoji support as fallback
+\newfontfamily\emojifont{Apple Color Emoji}[Renderer=Harfbuzz]
+% Better image positioning and scaling
+\floatplacement{figure}{H}
+\let\oldincludegraphics\includegraphics
+\renewcommand{\includegraphics}[2][]{%
+    \adjustbox{max width=\textwidth,center}{\oldincludegraphics[#1]{#2}}%
+}
+% Set margins - FIXED: Reduced margins significantly
+\geometry{
+    top=0.5in,
+    left=0.5in,
+    right=0.5in,
+    bottom=0.5in
+}
+% Hyperlink colors
+\hypersetup{
+    colorlinks=true,
+    linkcolor=blue,
+    urlcolor=blue,
+    citecolor=blue
+}

.github/workflows/scripts/preprocess_markdown.py ADDED Viewed

	@@ -0,0 +1,165 @@

+#!/usr/bin/env python3
+import re
+import os
+import sys
+import subprocess
+from pathlib import Path
+def process_mermaid_diagrams(content, file_dir):
+    """Convert mermaid diagrams to images"""
+    mermaid_pattern = r'```mermaid\n(.*?)\n```'
+    def replace_mermaid(match):
+        mermaid_code = match.group(1)
+        # Create a unique filename for this diagram
+        diagram_hash = str(abs(hash(mermaid_code)))
+        mermaid_file = f"{file_dir}/mermaid_{diagram_hash}.mmd"
+        svg_file = f"{file_dir}/mermaid_{diagram_hash}.svg"
+        png_file = f"{file_dir}/mermaid_{diagram_hash}.png"
+        # Write mermaid code to file
+        try:
+            with open(mermaid_file, 'w', encoding='utf-8') as f:
+                f.write(mermaid_code)
+        except Exception as e:
+            print(f"Error writing mermaid file: {e}")
+            return f'\n```\n{mermaid_code}\n```\n'
+        try:
+            # Convert to SVG first - FIXED: Remove --puppeteerConfig
+            result = subprocess.run([
+                'mmdc', '-i', mermaid_file, '-o', svg_file,
+                '--theme', 'default', '--backgroundColor', 'white'
+            ], check=True, capture_output=True, text=True)
+            # Convert SVG to PNG for better PDF compatibility
+            subprocess.run([
+                'rsvg-convert', '-f', 'png', '-o', png_file,
+                '--width', '1200', '--height', '800', svg_file
+            ], check=True, capture_output=True, text=True)
+            # Clean up intermediate files
+            try:
+                os.remove(mermaid_file)
+                if os.path.exists(svg_file):
+                    os.remove(svg_file)
+            except:
+                pass
+            # Return markdown image syntax
+            return (
+                f'\n<div class="mermaid-container">\n\n'
+                f'![Architecture Diagram]({os.path.basename(png_file)})\n\n'
+                f'</div>\n'
+            )
+        except subprocess.CalledProcessError as e:
+            print(f"Error converting mermaid diagram: {e}")
+            print(f"Command output: {e.stderr if e.stderr else 'No stderr'}")
+            try:
+                os.remove(mermaid_file)
+            except:
+                pass
+            return f'\n```\n{mermaid_code}\n```\n'
+        except Exception as e:
+            print(f"Unexpected error with mermaid: {e}")
+            try:
+                os.remove(mermaid_file)
+            except:
+                pass
+            return f'\n```\n{mermaid_code}\n```\n'
+    return re.sub(mermaid_pattern, replace_mermaid, content, flags=re.DOTALL)
+def clean_emojis_and_fix_images(content, file_dir):
+    """Remove/replace emojis and fix image paths"""
+    emoji_replacements = {
+        '🎵': '[Audio]',
+        '🎬': '[Video]',
+        '📝': '[Document]',
+        '📊': '[Analytics]',
+        '🧠': '[AI]',
+        '🎥': '[Media]',
+        '📄': '[File]'
+    }
+    for emoji, replacement in emoji_replacements.items():
+        content = content.replace(emoji, replacement)
+    # Pattern to match markdown images
+    img_pattern = r'!\[([^\]]*)\]\(([^)]+)\)'
+    def replace_image(match):
+        alt_text = match.group(1)
+        img_path = match.group(2)
+        if not img_path.startswith(('http://', 'https://', '/')):
+            abs_img_path = os.path.join(file_dir, img_path)
+            if os.path.exists(abs_img_path):
+                img_path = os.path.relpath(abs_img_path, file_dir)
+        return (
+            f'<img src="{img_path}" alt="{alt_text}" '
+            f'style="max-width: 100%; height: auto; display: block; margin: 1em auto;" />'
+        )
+    content = re.sub(img_pattern, replace_image, content)
+    # Fix existing HTML img tags
+    content = re.sub(
+        r'<img\s+([^>]*?)\s*/?>',
+        lambda m: (
+            f'<img {m.group(1)} '
+            f'style="max-width: 100%; height: auto; display: block; margin: 1em auto;" />'
+        ),
+        content
+    )
+    return content
+def main():
+    if len(sys.argv) != 2:
+        print("Usage: python preprocess_markdown.py <markdown_file>")
+        sys.exit(1)
+    md_file = sys.argv[1]
+    if not os.path.exists(md_file):
+        print(f"Error: File {md_file} does not exist")
+        sys.exit(1)
+    try:
+        file_dir = os.path.dirname(os.path.abspath(md_file))
+        with open(md_file, 'r', encoding='utf-8') as f:
+            content = f.read()
+        print(f"Processing file: {md_file}")
+        print(f"File directory: {file_dir}")
+        print(f"Content length: {len(content)} characters")
+        # Process mermaid diagrams
+        content = process_mermaid_diagrams(content, file_dir)
+        print(f"Mermaid processing complete. Content length: {len(content)}")
+        # Clean emojis and fix image paths
+        content = clean_emojis_and_fix_images(content, file_dir)
+        print(f"Image path fixing complete. Content length: {len(content)}")
+        # Write processed content
+        processed_file = md_file.replace('.md', '_processed.md')
+        with open(processed_file, 'w', encoding='utf-8') as f:
+            f.write(content)
+        print(f"Processed file saved as: {processed_file}")
+        print(processed_file)
+    except Exception as e:
+        print(f"Error processing {md_file}: {e}")
+        import traceback
+        traceback.print_exc()
+        sys.exit(1)
+if __name__ == "__main__":
+    main()

.github/workflows/scripts/setup_system.sh ADDED Viewed

	@@ -0,0 +1,38 @@

+#!/bin/bash
+set -e
+echo "Installing system dependencies..."
+sudo apt-get update
+sudo apt-get install -y \
+  texlive-full \
+  texlive-xetex \
+  texlive-luatex \
+  pandoc \
+  librsvg2-bin \
+  python3-pip \
+  nodejs \
+  npm \
+  imagemagick \
+  ghostscript \
+  wkhtmltopdf
+echo "Installing Node.js dependencies for Mermaid..."
+npm install -g @mermaid-js/mermaid-cli
+npm install -g puppeteer
+sudo apt-get install -y google-chrome-stable
+echo "Installing Python dependencies..."
+pip install --upgrade pip
+pip install \
+  weasyprint \
+  markdown \
+  pymdown-extensions \
+  pillow \
+  cairosvg \
+  pdfkit \
+  google-auth \
+  google-auth-oauthlib \
+  google-auth-httplib2 \
+  google-api-python-client
+echo "System setup complete!"

.github/workflows/scripts/styles.css ADDED Viewed

	@@ -0,0 +1,109 @@

+body {
+    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+    max-width: 210mm;
+    margin: 0 auto;
+    padding: 20mm;
+    line-height: 1.6;
+    color: #333;
+    background: white;
+}
+img {
+    max-width: 100%;
+    height: auto;
+    display: block;
+    margin: 1em auto;
+    border-radius: 4px;
+    box-shadow: 0 2px 8px rgba(0,0,0,0.1);
+}
+pre {
+    background: #f8f9fa;
+    padding: 1em;
+    border-radius: 6px;
+    border-left: 4px solid #007acc;
+    overflow-x: auto;
+    font-family: 'Consolas', 'Monaco', 'Courier New', monospace;
+    font-size: 0.9em;
+}
+code {
+    background: #f1f3f4;
+    padding: 0.2em 0.4em;
+    border-radius: 3px;
+    font-family: 'Consolas', 'Monaco', 'Courier New', monospace;
+    font-size: 0.9em;
+}
+h1, h2, h3, h4, h5, h6 {
+    color: #2c3e50;
+    margin-top: 2em;
+    margin-bottom: 1em;
+    page-break-after: avoid;
+}
+h1 {
+    border-bottom: 3px solid #3498db;
+    padding-bottom: 0.5em;
+}
+h2 {
+    border-bottom: 2px solid #95a5a6;
+    padding-bottom: 0.3em;
+}
+table {
+    border-collapse: collapse;
+    width: 100%;
+    margin: 1em 0;
+}
+th, td {
+    border: 1px solid #ddd;
+    padding: 0.75em;
+    text-align: left;
+}
+th {
+    background-color: #f8f9fa;
+    font-weight: bold;
+}
+blockquote {
+    border-left: 4px solid #3498db;
+    margin: 1em 0;
+    padding: 0.5em 1em;
+    background: #f8f9fa;
+    border-radius: 0 4px 4px 0;
+}
+.mermaid-container {
+    text-align: center;
+    margin: 2em 0;
+    page-break-inside: avoid;
+}
+.mermaid-container img {
+    max-width: 100%;
+    height: auto;
+}
+@media print {
+    body {
+        margin: 0;
+        padding: 15mm;
+    }
+    img {
+        max-height: 80vh;
+        page-break-inside: avoid;
+    }
+    h1, h2, h3, h4, h5, h6 {
+        page-break-after: avoid;
+    }
+    pre, blockquote {
+        page-break-inside: avoid;
+    }
+}

.github/workflows/scripts/upload_to_drive.py ADDED Viewed

	@@ -0,0 +1,135 @@

+import os
+import json
+import mimetypes
+from google.oauth2.credentials import Credentials
+from google.auth.transport.requests import Request
+from googleapiclient.discovery import build
+from googleapiclient.http import MediaFileUpload
+# Load OAuth credentials from environment
+oauth_token_json = os.environ['GOOGLE_OAUTH_TOKEN']
+token_info = json.loads(oauth_token_json)
+# Create credentials from the token info
+credentials = Credentials.from_authorized_user_info(token_info)
+# Refresh the token if needed
+if credentials.expired and credentials.refresh_token:
+    credentials.refresh(Request())
+# Build the Drive service
+service = build('drive', 'v3', credentials=credentials)
+# Target folder ID - This is where files will be uploaded
+FOLDER_ID = '1-8HJcWxsUUQIj9OMXQeoeULS06RA9Hg9'
+def get_mime_type(file_path):
+    mime_type, _ = mimetypes.guess_type(file_path)
+    return mime_type or 'application/octet-stream'
+def upload_file(file_path, parent_folder_id, drive_service):
+    file_name = os.path.basename(file_path)
+    # Check if file already exists in the specific folder
+    query = f"name='{file_name}' and '{parent_folder_id}' in parents and trashed=false"
+    results = drive_service.files().list(q=query).execute()
+    items = results.get('files', [])
+    media = MediaFileUpload(file_path, mimetype=get_mime_type(file_path), resumable=True)
+    if items:
+        # Update existing file
+        file_id = items[0]['id']
+        updated_file = drive_service.files().update(
+            fileId=file_id,
+            media_body=media
+        ).execute()
+        print(f'Updated: {file_name} (ID: {updated_file.get("id")})')
+    else:
+        # Create new file
+        file_metadata = {
+            'name': file_name,
+            'parents': [parent_folder_id]
+        }
+        file = drive_service.files().create(
+            body=file_metadata,
+            media_body=media,
+            fields='id'
+        ).execute()
+        print(f'Uploaded: {file_name} (ID: {file.get("id")})')
+def create_folder_if_not_exists(folder_name, parent_folder_id, drive_service):
+    """Create a folder if it doesn't exist and return its ID"""
+    query = (
+        f"name='{folder_name}' and '{parent_folder_id}' in parents and "
+        f"mimeType='application/vnd.google-apps.folder' and trashed=false"
+    )
+    results = drive_service.files().list(q=query).execute()
+    items = results.get('files', [])
+    if items:
+        return items[0]['id']
+    else:
+        folder_metadata = {
+            'name': folder_name,
+            'parents': [parent_folder_id],
+            'mimeType': 'application/vnd.google-apps.folder'
+        }
+        folder = drive_service.files().create(body=folder_metadata, fields='id').execute()
+        print(f'Created folder: {folder_name} (ID: {folder.get("id")})')
+        return folder.get('id')
+def upload_directory(local_path, parent_folder_id, drive_service, exclude_dirs=None, exclude_files=None):
+    if exclude_dirs is None:
+        exclude_dirs = ['.git', '.github', 'node_modules', '__pycache__']
+    if exclude_files is None:
+        exclude_files = ['*.md']  # Skip markdown files
+    import fnmatch
+    for root, dirs, files in os.walk(local_path):
+        # Remove excluded directories
+        dirs[:] = [d for d in dirs if d not in exclude_dirs]
+        # Calculate relative path from the root
+        rel_path = os.path.relpath(root, local_path)
+        current_folder_id = parent_folder_id
+        # Create nested folders if needed
+        if rel_path != '.':
+            path_parts = rel_path.split(os.sep)
+            for part in path_parts:
+                current_folder_id = create_folder_if_not_exists(part, current_folder_id, drive_service)
+        # Upload files in current directory
+        for file in files:
+            should_skip = False
+            for pattern in exclude_files:
+                if fnmatch.fnmatch(file, pattern):
+                    should_skip = True
+                    break
+            if should_skip:
+                print(f'Skipping {file} (excluded file type)')
+                continue
+            file_path = os.path.join(root, file)
+            try:
+                upload_file(file_path, current_folder_id, drive_service)
+            except Exception as e:
+                print(f'Error uploading {file_path}: {e}')
+# Test folder permissions first
+try:
+    test_query = f"'{FOLDER_ID}' in parents and trashed=false"
+    test_results = service.files().list(q=test_query, pageSize=1).execute()
+    print(f"Successfully accessed folder. Found {len(test_results.get('files', []))} items (showing 1 max)")
+except Exception as e:
+    print(f"ERROR: Cannot access folder {FOLDER_ID}. Error: {e}")
+    exit(1)
+# Upload all files to Google Drive (excluding MD files)
+print("Starting upload to Google Drive...")
+upload_directory('.', FOLDER_ID, service)
+print("Upload completed - MD files were skipped, PDFs were uploaded!")