Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
update
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +14 -35
- .gitignore +39 -0
- CHANGELOG.md +118 -0
- CONTRIBUTING.md +196 -0
- LICENSE +33 -0
- README.md +116 -5
- app/astro.config.mjs +76 -0
- app/package-lock.json +0 -0
- app/plugins/rehype/code-copy.mjs +94 -0
- app/plugins/rehype/post-citation.mjs +441 -0
- app/plugins/rehype/restore-at-in-code.mjs +22 -0
- app/plugins/rehype/wrap-outputs.mjs +38 -0
- app/plugins/rehype/wrap-tables.mjs +43 -0
- app/plugins/remark/ignore-citations-in-code.mjs +21 -0
- app/plugins/remark/output-container.mjs +23 -0
- app/plugins/remark/outputs-container.mjs +23 -0
- app/postcss.config.mjs +14 -0
- app/public/data +1 -0
- app/public/scripts/color-palettes.js +274 -0
- app/scripts/export-latex.mjs +318 -0
- app/scripts/export-pdf.mjs +483 -0
- app/scripts/generate-trackio-data.mjs +196 -0
- app/scripts/jitter-trackio-data.mjs +129 -0
- app/scripts/latex-importer/README.md +169 -0
- app/scripts/latex-importer/bib-cleaner.mjs +104 -0
- app/scripts/latex-importer/filters/equation-ids.lua +134 -0
- app/scripts/latex-importer/index.mjs +138 -0
- app/scripts/latex-importer/latex-converter.mjs +330 -0
- app/scripts/latex-importer/mdx-converter.mjs +896 -0
- app/scripts/latex-importer/metadata-extractor.mjs +170 -0
- app/scripts/latex-importer/package-lock.json +0 -0
- app/scripts/latex-importer/package.json +0 -0
- app/scripts/latex-importer/post-processor.mjs +439 -0
- app/scripts/latex-importer/reference-preprocessor.mjs +239 -0
- app/scripts/notion-importer/.cursorignore +1 -0
- app/scripts/notion-importer/.notion-to-md/media/27877f1c-9c9d-804d-9c82-f7b3905578ff_media.json +3 -0
- app/scripts/notion-importer/custom-code-renderer.mjs +33 -0
- app/scripts/notion-importer/debug-properties.mjs +87 -0
- app/scripts/notion-importer/input/pages.json +3 -0
- app/scripts/notion-importer/mdx-converter.mjs +551 -0
- app/scripts/notion-importer/notion-converter.mjs +259 -0
- app/scripts/notion-importer/notion-metadata-extractor.mjs +303 -0
- app/scripts/notion-importer/output/.temp-pages.json +0 -0
- app/scripts/notion-importer/output/media/27877f1c9c9d804d9c82f7b3905578ff/image_27877f1c-9c9d-8013-b668-f14bd1ac0ec0.png +0 -0
- app/scripts/notion-importer/output/media/27877f1c9c9d804d9c82f7b3905578ff/image_27877f1c-9c9d-8014-834f-d700b623256b.png +0 -0
- app/scripts/notion-importer/output/media/27877f1c9c9d804d9c82f7b3905578ff/image_27877f1c-9c9d-801d-841a-e35011491566.png +0 -0
- app/scripts/notion-importer/output/media/27877f1c9c9d804d9c82f7b3905578ff/image_27877f1c-9c9d-8031-ac8d-c5678af1bdd5.png +0 -0
- app/scripts/notion-importer/output/media/27877f1c9c9d804d9c82f7b3905578ff/image_27877f1c-9c9d-8048-9b7e-db4fa7485915.png +0 -0
- app/scripts/notion-importer/output/media/27877f1c9c9d804d9c82f7b3905578ff/image_27877f1c-9c9d-804d-bd0a-e0b1c15e504f.png +0 -0
- app/scripts/notion-importer/output/media/27877f1c9c9d804d9c82f7b3905578ff/image_27877f1c-9c9d-8075-ae2e-dc24fe9296ca.png +0 -0
.gitattributes
CHANGED
|
@@ -1,35 +1,14 @@
|
|
| 1 |
-
*.
|
| 2 |
-
*.
|
| 3 |
-
*.
|
| 4 |
-
*.
|
| 5 |
-
*.
|
| 6 |
-
*.
|
| 7 |
-
*.
|
| 8 |
-
*.
|
| 9 |
-
*.
|
| 10 |
-
*.
|
| 11 |
-
*.
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 1 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.jpg filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.jpeg filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.gif filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.mp3 filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.mov filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.avi filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.wav filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.csv filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.json filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
# the package and package lock should not be tracked
|
| 13 |
+
package.json -filter -diff -merge text
|
| 14 |
+
package-lock.json -filter -diff -merge text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.gitignore
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*.so
|
| 5 |
+
.Python
|
| 6 |
+
env/
|
| 7 |
+
venv/
|
| 8 |
+
*.egg-info/
|
| 9 |
+
dist/
|
| 10 |
+
build/
|
| 11 |
+
*.egg
|
| 12 |
+
.idea/
|
| 13 |
+
.vscode/
|
| 14 |
+
.astro/
|
| 15 |
+
.claude/
|
| 16 |
+
*.swp
|
| 17 |
+
.DS_Store
|
| 18 |
+
# Node
|
| 19 |
+
node_modules/
|
| 20 |
+
*.log
|
| 21 |
+
*.env
|
| 22 |
+
*.cache
|
| 23 |
+
|
| 24 |
+
app/scripts/latex-to-mdx/output/
|
| 25 |
+
app/src/content/embeds/typography/generated
|
| 26 |
+
|
| 27 |
+
# PDF export
|
| 28 |
+
app/public/*.pdf
|
| 29 |
+
app/public/*.png
|
| 30 |
+
app/public/*.jpg
|
| 31 |
+
app/public/data/**/*
|
| 32 |
+
|
| 33 |
+
.astro/
|
| 34 |
+
|
| 35 |
+
# Template sync temporary directories
|
| 36 |
+
.template-sync/
|
| 37 |
+
.temp-*/
|
| 38 |
+
.backup-*/
|
| 39 |
+
|
CHANGELOG.md
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Changelog
|
| 2 |
+
|
| 3 |
+
All notable changes to the Research Article Template will be documented in this file.
|
| 4 |
+
|
| 5 |
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
| 6 |
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
| 7 |
+
|
| 8 |
+
## [Unreleased]
|
| 9 |
+
|
| 10 |
+
### Added
|
| 11 |
+
- Initial open source release
|
| 12 |
+
- Comprehensive documentation
|
| 13 |
+
- Contributing guidelines
|
| 14 |
+
- License file
|
| 15 |
+
|
| 16 |
+
## [1.0.0] - 2024-12-19
|
| 17 |
+
|
| 18 |
+
### Added
|
| 19 |
+
- **Core Features**:
|
| 20 |
+
- Markdown/MDX-based writing system
|
| 21 |
+
- KaTeX mathematical notation support
|
| 22 |
+
- Syntax highlighting for code blocks
|
| 23 |
+
- Academic citations with BibTeX integration
|
| 24 |
+
- Footnotes and sidenotes system
|
| 25 |
+
- Auto-generated table of contents
|
| 26 |
+
- Interactive Mermaid diagrams
|
| 27 |
+
- Plotly.js and D3.js integration
|
| 28 |
+
- HTML embed support
|
| 29 |
+
- Gradio app embedding
|
| 30 |
+
- Dataviz color palettes
|
| 31 |
+
- Image optimization
|
| 32 |
+
- SEO-friendly structure
|
| 33 |
+
- Automatic PDF export
|
| 34 |
+
- Dark/light theme toggle
|
| 35 |
+
- Mobile-responsive design
|
| 36 |
+
- LaTeX import functionality
|
| 37 |
+
- Template synchronization system
|
| 38 |
+
|
| 39 |
+
- **Components**:
|
| 40 |
+
- Figure component with captions
|
| 41 |
+
- MultiFigure for image galleries
|
| 42 |
+
- Note component with variants
|
| 43 |
+
- Quote component
|
| 44 |
+
- Accordion for collapsible content
|
| 45 |
+
- Sidenote component
|
| 46 |
+
- Table of Contents
|
| 47 |
+
- Theme Toggle
|
| 48 |
+
- HTML Embed
|
| 49 |
+
- Raw HTML support
|
| 50 |
+
- SEO component
|
| 51 |
+
- Hero section
|
| 52 |
+
- Footer
|
| 53 |
+
- Full-width and wide layouts
|
| 54 |
+
|
| 55 |
+
- **Build System**:
|
| 56 |
+
- Astro 4.10.0 integration
|
| 57 |
+
- PostCSS with custom media queries
|
| 58 |
+
- Automatic compression
|
| 59 |
+
- Docker support
|
| 60 |
+
- Nginx configuration
|
| 61 |
+
- Git LFS support
|
| 62 |
+
|
| 63 |
+
- **Scripts**:
|
| 64 |
+
- PDF export functionality
|
| 65 |
+
- LaTeX to MDX conversion
|
| 66 |
+
- Template synchronization
|
| 67 |
+
- Font SVG generation
|
| 68 |
+
- TrackIO data generation
|
| 69 |
+
|
| 70 |
+
- **Documentation**:
|
| 71 |
+
- Getting started guide
|
| 72 |
+
- Writing best practices
|
| 73 |
+
- Component reference
|
| 74 |
+
- LaTeX conversion guide
|
| 75 |
+
- Interactive examples
|
| 76 |
+
|
| 77 |
+
### Technical Details
|
| 78 |
+
- **Framework**: Astro 4.10.0
|
| 79 |
+
- **Styling**: PostCSS with custom properties
|
| 80 |
+
- **Math**: KaTeX 0.16.22
|
| 81 |
+
- **Charts**: Plotly.js 3.1.0, D3.js 7.9.0
|
| 82 |
+
- **Diagrams**: Mermaid 11.10.1
|
| 83 |
+
- **Node.js**: >=20.0.0
|
| 84 |
+
- **License**: CC-BY-4.0
|
| 85 |
+
|
| 86 |
+
### Browser Support
|
| 87 |
+
- Chrome (latest)
|
| 88 |
+
- Firefox (latest)
|
| 89 |
+
- Safari (latest)
|
| 90 |
+
- Edge (latest)
|
| 91 |
+
|
| 92 |
+
---
|
| 93 |
+
|
| 94 |
+
## Version History
|
| 95 |
+
|
| 96 |
+
- **1.0.0**: Initial stable release with full feature set
|
| 97 |
+
- **0.0.1**: Development version (pre-release)
|
| 98 |
+
|
| 99 |
+
## Migration Guide
|
| 100 |
+
|
| 101 |
+
### From 0.0.1 to 1.0.0
|
| 102 |
+
|
| 103 |
+
This is the first stable release. No breaking changes from the development version.
|
| 104 |
+
|
| 105 |
+
### Updating Your Project
|
| 106 |
+
|
| 107 |
+
Use the template synchronization system to update:
|
| 108 |
+
|
| 109 |
+
```bash
|
| 110 |
+
npm run sync:template -- --dry-run # Preview changes
|
| 111 |
+
npm run sync:template # Apply updates
|
| 112 |
+
```
|
| 113 |
+
|
| 114 |
+
## Support
|
| 115 |
+
|
| 116 |
+
- **Documentation**: [Hugging Face Space](https://huggingface.co/spaces/tfrere/research-article-template)
|
| 117 |
+
- **Issues**: [Community Discussions](https://huggingface.co/spaces/tfrere/research-article-template/discussions)
|
| 118 |
+
- **Contact**: [@tfrere](https://huggingface.co/tfrere)
|
CONTRIBUTING.md
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Contributing to Research Article Template
|
| 2 |
+
|
| 3 |
+
Thank you for your interest in contributing to the Research Article Template! This document provides guidelines and information for contributors.
|
| 4 |
+
|
| 5 |
+
## 🤝 How to Contribute
|
| 6 |
+
|
| 7 |
+
### Reporting Issues
|
| 8 |
+
|
| 9 |
+
Before creating an issue, please:
|
| 10 |
+
1. **Search existing issues** to avoid duplicates
|
| 11 |
+
2. **Use the issue template** when available
|
| 12 |
+
3. **Provide detailed information**:
|
| 13 |
+
- Clear description of the problem
|
| 14 |
+
- Steps to reproduce
|
| 15 |
+
- Expected vs actual behavior
|
| 16 |
+
- Environment details (OS, Node.js version, browser)
|
| 17 |
+
- Screenshots if applicable
|
| 18 |
+
|
| 19 |
+
### Suggesting Features
|
| 20 |
+
|
| 21 |
+
We welcome feature suggestions! Please:
|
| 22 |
+
1. **Check existing discussions** first
|
| 23 |
+
2. **Describe the use case** clearly
|
| 24 |
+
3. **Explain the benefits** for the community
|
| 25 |
+
4. **Consider implementation complexity**
|
| 26 |
+
|
| 27 |
+
### Code Contributions
|
| 28 |
+
|
| 29 |
+
#### Getting Started
|
| 30 |
+
|
| 31 |
+
1. **Fork the repository** on Hugging Face
|
| 32 |
+
2. **Clone your fork**:
|
| 33 |
+
```bash
|
| 34 |
+
git clone git@hf.co:spaces/<your-username>/research-article-template
|
| 35 |
+
cd research-article-template
|
| 36 |
+
```
|
| 37 |
+
3. **Install dependencies**:
|
| 38 |
+
```bash
|
| 39 |
+
cd app
|
| 40 |
+
npm install
|
| 41 |
+
```
|
| 42 |
+
4. **Create a feature branch**:
|
| 43 |
+
```bash
|
| 44 |
+
git checkout -b feature/your-feature-name
|
| 45 |
+
```
|
| 46 |
+
|
| 47 |
+
#### Development Workflow
|
| 48 |
+
|
| 49 |
+
1. **Make your changes** following our coding standards
|
| 50 |
+
2. **Test thoroughly**:
|
| 51 |
+
```bash
|
| 52 |
+
npm run dev # Test locally
|
| 53 |
+
npm run build # Ensure build works
|
| 54 |
+
```
|
| 55 |
+
3. **Update documentation** if needed
|
| 56 |
+
4. **Commit with clear messages**:
|
| 57 |
+
```bash
|
| 58 |
+
git commit -m "feat: add new component for interactive charts"
|
| 59 |
+
```
|
| 60 |
+
|
| 61 |
+
#### Pull Request Process
|
| 62 |
+
|
| 63 |
+
1. **Push your branch**:
|
| 64 |
+
```bash
|
| 65 |
+
git push origin feature/your-feature-name
|
| 66 |
+
```
|
| 67 |
+
2. **Create a Pull Request** with:
|
| 68 |
+
- Clear title and description
|
| 69 |
+
- Reference related issues
|
| 70 |
+
- Screenshots for UI changes
|
| 71 |
+
- Testing instructions
|
| 72 |
+
|
| 73 |
+
## 📋 Coding Standards
|
| 74 |
+
|
| 75 |
+
### Code Style
|
| 76 |
+
|
| 77 |
+
- **Use Prettier** for consistent formatting
|
| 78 |
+
- **Follow existing patterns** in the codebase
|
| 79 |
+
- **Write clear, self-documenting code**
|
| 80 |
+
- **Add comments** for complex logic
|
| 81 |
+
- **Use meaningful variable names**
|
| 82 |
+
|
| 83 |
+
### File Organization
|
| 84 |
+
|
| 85 |
+
- **Components**: Place in `src/components/`
|
| 86 |
+
- **Styles**: Use CSS modules or component-scoped styles
|
| 87 |
+
- **Assets**: Organize in `src/content/assets/`
|
| 88 |
+
- **Documentation**: Update relevant `.mdx` files
|
| 89 |
+
|
| 90 |
+
### Commit Message Format
|
| 91 |
+
|
| 92 |
+
We follow [Conventional Commits](https://www.conventionalcommits.org/):
|
| 93 |
+
|
| 94 |
+
```
|
| 95 |
+
type(scope): description
|
| 96 |
+
|
| 97 |
+
feat: add new interactive chart component
|
| 98 |
+
fix: resolve mobile layout issues
|
| 99 |
+
docs: update installation instructions
|
| 100 |
+
style: improve button hover states
|
| 101 |
+
refactor: simplify component structure
|
| 102 |
+
test: add unit tests for utility functions
|
| 103 |
+
```
|
| 104 |
+
|
| 105 |
+
**Types**: `feat`, `fix`, `docs`, `style`, `refactor`, `test`, `chore`
|
| 106 |
+
|
| 107 |
+
## 🧪 Testing
|
| 108 |
+
|
| 109 |
+
### Manual Testing
|
| 110 |
+
|
| 111 |
+
Before submitting:
|
| 112 |
+
- [ ] Test on different screen sizes
|
| 113 |
+
- [ ] Verify dark/light theme compatibility
|
| 114 |
+
- [ ] Check browser compatibility (Chrome, Firefox, Safari)
|
| 115 |
+
- [ ] Test with different content types
|
| 116 |
+
- [ ] Ensure accessibility standards
|
| 117 |
+
|
| 118 |
+
### Automated Testing
|
| 119 |
+
|
| 120 |
+
```bash
|
| 121 |
+
# Run build to catch errors
|
| 122 |
+
npm run build
|
| 123 |
+
|
| 124 |
+
# Test PDF export
|
| 125 |
+
npm run export:pdf
|
| 126 |
+
|
| 127 |
+
# Test LaTeX conversion
|
| 128 |
+
npm run latex:convert
|
| 129 |
+
```
|
| 130 |
+
|
| 131 |
+
## 📚 Documentation
|
| 132 |
+
|
| 133 |
+
### Writing Guidelines
|
| 134 |
+
|
| 135 |
+
- **Use clear, concise language**
|
| 136 |
+
- **Provide examples** for complex features
|
| 137 |
+
- **Include screenshots** for UI changes
|
| 138 |
+
- **Update both English content and code comments**
|
| 139 |
+
|
| 140 |
+
### Documentation Structure
|
| 141 |
+
|
| 142 |
+
- **README.md**: Project overview and quick start
|
| 143 |
+
- **CONTRIBUTING.md**: This file
|
| 144 |
+
- **Content files**: In `src/content/chapters/demo/`
|
| 145 |
+
- **Component docs**: Inline comments and examples
|
| 146 |
+
|
| 147 |
+
## 🎯 Areas for Contribution
|
| 148 |
+
|
| 149 |
+
### High Priority
|
| 150 |
+
|
| 151 |
+
- **Bug fixes** and stability improvements
|
| 152 |
+
- **Accessibility enhancements**
|
| 153 |
+
- **Mobile responsiveness**
|
| 154 |
+
- **Performance optimizations**
|
| 155 |
+
- **Documentation improvements**
|
| 156 |
+
|
| 157 |
+
### Feature Ideas
|
| 158 |
+
|
| 159 |
+
- **New interactive components**
|
| 160 |
+
- **Additional export formats**
|
| 161 |
+
- **Enhanced LaTeX import**
|
| 162 |
+
- **Theme customization**
|
| 163 |
+
- **Plugin system**
|
| 164 |
+
|
| 165 |
+
### Community
|
| 166 |
+
|
| 167 |
+
- **Answer questions** in discussions
|
| 168 |
+
- **Share examples** of your work
|
| 169 |
+
- **Write tutorials** and guides
|
| 170 |
+
- **Help with translations**
|
| 171 |
+
|
| 172 |
+
## 🚫 What Not to Contribute
|
| 173 |
+
|
| 174 |
+
- **Breaking changes** without discussion
|
| 175 |
+
- **Major architectural changes** without approval
|
| 176 |
+
- **Dependencies** that significantly increase bundle size
|
| 177 |
+
- **Features** that don't align with the project's goals
|
| 178 |
+
|
| 179 |
+
## 📞 Getting Help
|
| 180 |
+
|
| 181 |
+
- **Discussions**: [Community tab](https://huggingface.co/spaces/tfrere/research-article-template/discussions)
|
| 182 |
+
- **Issues**: [Report bugs](https://huggingface.co/spaces/tfrere/research-article-template/discussions?status=open&type=issue)
|
| 183 |
+
- **Contact**: [@tfrere](https://huggingface.co/tfrere) on Hugging Face
|
| 184 |
+
|
| 185 |
+
## 📄 License
|
| 186 |
+
|
| 187 |
+
By contributing, you agree that your contributions will be licensed under the same [CC-BY-4.0 license](LICENSE) that covers the project.
|
| 188 |
+
|
| 189 |
+
## 🙏 Recognition
|
| 190 |
+
|
| 191 |
+
Contributors will be:
|
| 192 |
+
- **Listed in acknowledgments** (if desired)
|
| 193 |
+
- **Mentioned in release notes** for significant contributions
|
| 194 |
+
- **Credited** in relevant documentation
|
| 195 |
+
|
| 196 |
+
Thank you for helping make scientific writing more accessible and interactive! 🎉
|
LICENSE
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Creative Commons Attribution 4.0 International License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2024 Thibaud Frere
|
| 4 |
+
|
| 5 |
+
This work is licensed under the Creative Commons Attribution 4.0 International License.
|
| 6 |
+
To view a copy of this license, visit http://creativecommons.org/licenses/by/4.0/
|
| 7 |
+
or send a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
|
| 8 |
+
|
| 9 |
+
You are free to:
|
| 10 |
+
|
| 11 |
+
Share — copy and redistribute the material in any medium or format
|
| 12 |
+
Adapt — remix, transform, and build upon the material for any purpose, even commercially.
|
| 13 |
+
|
| 14 |
+
The licensor cannot revoke these freedoms as long as you follow the license terms.
|
| 15 |
+
|
| 16 |
+
Under the following terms:
|
| 17 |
+
|
| 18 |
+
Attribution — You must give appropriate credit, provide a link to the license, and indicate if changes were made. You may do so in any reasonable manner, but not in any way that suggests the licensor endorses you or your use.
|
| 19 |
+
|
| 20 |
+
No additional restrictions — You may not apply legal terms or technological measures that legally restrict others from doing anything the license permits.
|
| 21 |
+
|
| 22 |
+
Notices:
|
| 23 |
+
|
| 24 |
+
You do not have to comply with the license for elements of the material in the public domain or where your use is permitted by an applicable exception or limitation.
|
| 25 |
+
|
| 26 |
+
No warranties are given. The license may not give you all of the permissions necessary for your intended use. For example, other rights such as publicity, privacy, or moral rights may limit how you use the material.
|
| 27 |
+
|
| 28 |
+
---
|
| 29 |
+
|
| 30 |
+
For the source code and technical implementation:
|
| 31 |
+
- The source code is available at: https://huggingface.co/spaces/tfrere/research-article-template
|
| 32 |
+
- Third-party figures and assets are excluded from this license and marked in their captions
|
| 33 |
+
- Dependencies and third-party libraries maintain their respective licenses
|
README.md
CHANGED
|
@@ -1,10 +1,121 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
---
|
|
|
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: 'Bringing paper to life: A modern template for scientific writing'
|
| 3 |
+
emoji: 📝
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: indigo
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
+
header: mini
|
| 9 |
+
app_port: 8080
|
| 10 |
+
tags:
|
| 11 |
+
- research-article-template
|
| 12 |
+
- research paper
|
| 13 |
+
- scientific paper
|
| 14 |
+
- data visualization
|
| 15 |
+
thumbnail: https://huggingface.co/spaces/tfrere/research-paper-template/thumb.jpg
|
| 16 |
---
|
| 17 |
+
<div align="center">
|
| 18 |
|
| 19 |
+
# Research Article Template
|
| 20 |
+
|
| 21 |
+
[](https://creativecommons.org/licenses/by/4.0/)
|
| 22 |
+
[](https://nodejs.org/)
|
| 23 |
+
[](https://astro.build/)
|
| 24 |
+
[](https://huggingface.co/spaces/tfrere/research-article-template)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
**A modern, interactive template for scientific writing** that brings papers to life with web-native features. The web offers what static PDFs can't: **interactive diagrams**, **progressive notation**, and **exploratory views** that show how ideas behave. This template treats interactive artifacts—figures, math, code, and inspectable experiments—as **first-class** alongside prose, helping readers **build intuition** instead of skimming results—all with **minimal setup** and no web knowledge required.
|
| 28 |
+
|
| 29 |
+
**[Try the live demo & documentation →](https://huggingface.co/spaces/tfrere/research-article-template)**
|
| 30 |
+
|
| 31 |
+
</div>
|
| 32 |
+
|
| 33 |
+
## 🚀 Quick Start
|
| 34 |
+
|
| 35 |
+
### Option 1: Duplicate on Hugging Face (Recommended)
|
| 36 |
+
|
| 37 |
+
1. Visit **[🤗 Research Article Template](https://huggingface.co/spaces/tfrere/research-article-template)**
|
| 38 |
+
2. Click **"Duplicate this Space"**
|
| 39 |
+
3. Clone your new repository:
|
| 40 |
+
```bash
|
| 41 |
+
git clone git@hf.co:spaces/<your-username>/<your-space>
|
| 42 |
+
cd <your-space>
|
| 43 |
+
```
|
| 44 |
+
|
| 45 |
+
### Option 2: Clone Directly
|
| 46 |
+
|
| 47 |
+
```bash
|
| 48 |
+
git clone https://github.com/tfrere/research-article-template.git
|
| 49 |
+
cd research-article-template
|
| 50 |
+
```
|
| 51 |
+
|
| 52 |
+
### Installation
|
| 53 |
+
|
| 54 |
+
```bash
|
| 55 |
+
# Install Node.js 20+ (use nvm for version management)
|
| 56 |
+
nvm install 20
|
| 57 |
+
nvm use 20
|
| 58 |
+
|
| 59 |
+
# Install Git LFS and pull assets
|
| 60 |
+
git lfs install
|
| 61 |
+
git lfs pull
|
| 62 |
+
|
| 63 |
+
# Install dependencies
|
| 64 |
+
cd app
|
| 65 |
+
npm install
|
| 66 |
+
|
| 67 |
+
# Start development server
|
| 68 |
+
npm run dev
|
| 69 |
+
```
|
| 70 |
+
|
| 71 |
+
Visit `http://localhost:4321` to see your site!
|
| 72 |
+
|
| 73 |
+
## 🎯 Who This Is For
|
| 74 |
+
|
| 75 |
+
- **Scientists** writing modern, web-native research papers
|
| 76 |
+
- **Educators** creating interactive, explorable lessons
|
| 77 |
+
- **Researchers** who want to focus on ideas, not infrastructure
|
| 78 |
+
- **Anyone** who values clear, engaging technical communication
|
| 79 |
+
|
| 80 |
+
## 🌟 Inspired by Distill
|
| 81 |
+
|
| 82 |
+
This template carries forward the spirit of [Distill](https://distill.pub/) (2016–2021), pushing interactive scientific writing even further with:
|
| 83 |
+
- Accessible, high-quality explanations
|
| 84 |
+
- Reproducible, production-ready demos
|
| 85 |
+
- Modern web technologies and best practices
|
| 86 |
+
|
| 87 |
+
## 🤝 Contributing
|
| 88 |
+
|
| 89 |
+
We welcome contributions! Please see our [Contributing Guidelines](CONTRIBUTING.md) for details.
|
| 90 |
+
|
| 91 |
+
### Ways to Contribute
|
| 92 |
+
|
| 93 |
+
- **Report bugs** - Open an issue with detailed information
|
| 94 |
+
- **Suggest features** - Share ideas for improvements
|
| 95 |
+
- **Improve documentation** - Help others get started
|
| 96 |
+
- **Submit code** - Fix bugs or add features
|
| 97 |
+
- **Join discussions** - Share feedback and ideas
|
| 98 |
+
|
| 99 |
+
## 📄 License
|
| 100 |
+
|
| 101 |
+
This project is licensed under the [Creative Commons Attribution 4.0 International License](https://creativecommons.org/licenses/by/4.0/).
|
| 102 |
+
|
| 103 |
+
- **Diagrams and text**: CC-BY 4.0
|
| 104 |
+
- **Source code**: Available on [Hugging Face](https://huggingface.co/spaces/tfrere/research-article-template)
|
| 105 |
+
- **Third-party figures**: Excluded and marked in captions
|
| 106 |
+
|
| 107 |
+
## 🙏 Acknowledgments
|
| 108 |
+
|
| 109 |
+
- Inspired by [Distill](https://distill.pub/) and the interactive scientific writing movement
|
| 110 |
+
- Built with [Astro](https://astro.build/), [MDX](https://mdxjs.com/), and modern web technologies
|
| 111 |
+
- Community feedback and contributions from researchers worldwide
|
| 112 |
+
|
| 113 |
+
## 📞 Support
|
| 114 |
+
|
| 115 |
+
- **[Community Discussions](https://huggingface.co/spaces/tfrere/research-article-template/discussions)** - Ask questions and share ideas
|
| 116 |
+
- **[Report Issues](https://huggingface.co/spaces/tfrere/research-article-template/discussions?status=open&type=issue)** - Bug reports and feature requests
|
| 117 |
+
- **Contact**: [@tfrere](https://huggingface.co/tfrere) on Hugging Face
|
| 118 |
+
|
| 119 |
+
---
|
| 120 |
+
|
| 121 |
+
**Made with ❤️ for the scientific community**
|
app/astro.config.mjs
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { defineConfig } from 'astro/config';
|
| 2 |
+
import mdx from '@astrojs/mdx';
|
| 3 |
+
import svelte from '@astrojs/svelte';
|
| 4 |
+
import mermaid from 'astro-mermaid';
|
| 5 |
+
import compressor from 'astro-compressor';
|
| 6 |
+
import remarkMath from 'remark-math';
|
| 7 |
+
import rehypeKatex from 'rehype-katex';
|
| 8 |
+
import remarkFootnotes from 'remark-footnotes';
|
| 9 |
+
import rehypeSlug from 'rehype-slug';
|
| 10 |
+
import rehypeAutolinkHeadings from 'rehype-autolink-headings';
|
| 11 |
+
import rehypeCitation from 'rehype-citation';
|
| 12 |
+
import rehypeCodeCopy from './plugins/rehype/code-copy.mjs';
|
| 13 |
+
import rehypeReferencesAndFootnotes from './plugins/rehype/post-citation.mjs';
|
| 14 |
+
import remarkIgnoreCitationsInCode from './plugins/remark/ignore-citations-in-code.mjs';
|
| 15 |
+
import remarkDirective from 'remark-directive';
|
| 16 |
+
import remarkOutputContainer from './plugins/remark/output-container.mjs';
|
| 17 |
+
import rehypeRestoreAtInCode from './plugins/rehype/restore-at-in-code.mjs';
|
| 18 |
+
import rehypeWrapTables from './plugins/rehype/wrap-tables.mjs';
|
| 19 |
+
import rehypeWrapOutput from './plugins/rehype/wrap-outputs.mjs';
|
| 20 |
+
// Built-in Shiki (dual themes) — no rehype-pretty-code
|
| 21 |
+
|
| 22 |
+
// Plugins moved to app/plugins/*
|
| 23 |
+
|
| 24 |
+
export default defineConfig({
|
| 25 |
+
output: 'static',
|
| 26 |
+
integrations: [
|
| 27 |
+
mermaid({ theme: 'forest', autoTheme: true }),
|
| 28 |
+
mdx(),
|
| 29 |
+
svelte(),
|
| 30 |
+
// Precompress output with Gzip only (Brotli disabled due to server module mismatch)
|
| 31 |
+
compressor({ brotli: false, gzip: true })
|
| 32 |
+
],
|
| 33 |
+
devToolbar: {
|
| 34 |
+
enabled: false
|
| 35 |
+
},
|
| 36 |
+
markdown: {
|
| 37 |
+
shikiConfig: {
|
| 38 |
+
themes: {
|
| 39 |
+
light: 'github-light',
|
| 40 |
+
dark: 'github-dark'
|
| 41 |
+
},
|
| 42 |
+
defaultColor: false,
|
| 43 |
+
wrap: false,
|
| 44 |
+
langAlias: {
|
| 45 |
+
// Map MDX fences to TSX for better JSX tokenization
|
| 46 |
+
mdx: 'tsx'
|
| 47 |
+
}
|
| 48 |
+
},
|
| 49 |
+
remarkPlugins: [
|
| 50 |
+
remarkIgnoreCitationsInCode,
|
| 51 |
+
remarkMath,
|
| 52 |
+
[remarkFootnotes, { inlineNotes: true }],
|
| 53 |
+
remarkDirective,
|
| 54 |
+
remarkOutputContainer
|
| 55 |
+
],
|
| 56 |
+
rehypePlugins: [
|
| 57 |
+
rehypeSlug,
|
| 58 |
+
[rehypeAutolinkHeadings, { behavior: 'wrap' }],
|
| 59 |
+
[rehypeKatex, {
|
| 60 |
+
trust: true,
|
| 61 |
+
}],
|
| 62 |
+
[rehypeCitation, {
|
| 63 |
+
bibliography: 'src/content/bibliography.bib',
|
| 64 |
+
linkCitations: true,
|
| 65 |
+
csl: "apa",
|
| 66 |
+
}],
|
| 67 |
+
rehypeReferencesAndFootnotes,
|
| 68 |
+
rehypeRestoreAtInCode,
|
| 69 |
+
rehypeCodeCopy,
|
| 70 |
+
rehypeWrapOutput,
|
| 71 |
+
rehypeWrapTables
|
| 72 |
+
]
|
| 73 |
+
}
|
| 74 |
+
});
|
| 75 |
+
|
| 76 |
+
|
app/package-lock.json
ADDED
|
Binary file (450 kB). View file
|
|
|
app/plugins/rehype/code-copy.mjs
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// Minimal rehype plugin to wrap code blocks with a copy button
|
| 2 |
+
// Exported as a standalone module to keep astro.config.mjs lean
|
| 3 |
+
export default function rehypeCodeCopy() {
|
| 4 |
+
return (tree) => {
|
| 5 |
+
// Walk the tree; lightweight visitor to find <pre><code>
|
| 6 |
+
const visit = (node, parent) => {
|
| 7 |
+
if (!node || typeof node !== 'object') return;
|
| 8 |
+
const children = Array.isArray(node.children) ? node.children : [];
|
| 9 |
+
if (node.tagName === 'pre' && children.some(c => c.tagName === 'code')) {
|
| 10 |
+
// Find code child
|
| 11 |
+
const code = children.find(c => c.tagName === 'code');
|
| 12 |
+
// Determine if single-line block: prefer Shiki lines, then text content
|
| 13 |
+
const countLinesFromShiki = () => {
|
| 14 |
+
const isLineEl = (el) => el && el.type === 'element' && el.tagName === 'span' && Array.isArray(el.properties?.className) && el.properties.className.includes('line');
|
| 15 |
+
const hasNonWhitespaceText = (node) => {
|
| 16 |
+
if (!node) return false;
|
| 17 |
+
if (node.type === 'text') return /\S/.test(String(node.value || ''));
|
| 18 |
+
const kids = Array.isArray(node.children) ? node.children : [];
|
| 19 |
+
return kids.some(hasNonWhitespaceText);
|
| 20 |
+
};
|
| 21 |
+
const collectLines = (node, acc) => {
|
| 22 |
+
if (!node || typeof node !== 'object') return;
|
| 23 |
+
if (isLineEl(node)) acc.push(node);
|
| 24 |
+
const kids = Array.isArray(node.children) ? node.children : [];
|
| 25 |
+
kids.forEach((k) => collectLines(k, acc));
|
| 26 |
+
};
|
| 27 |
+
const lines = [];
|
| 28 |
+
collectLines(code, lines);
|
| 29 |
+
const nonEmpty = lines.filter((ln) => hasNonWhitespaceText(ln)).length;
|
| 30 |
+
return nonEmpty || 0;
|
| 31 |
+
};
|
| 32 |
+
const countLinesFromText = () => {
|
| 33 |
+
// Parse raw text content of the <code> node including nested spans
|
| 34 |
+
const extractText = (node) => {
|
| 35 |
+
if (!node) return '';
|
| 36 |
+
if (node.type === 'text') return String(node.value || '');
|
| 37 |
+
const kids = Array.isArray(node.children) ? node.children : [];
|
| 38 |
+
return kids.map(extractText).join('');
|
| 39 |
+
};
|
| 40 |
+
const raw = extractText(code);
|
| 41 |
+
if (!raw || !/\S/.test(raw)) return 0;
|
| 42 |
+
return raw.split('\n').filter(line => /\S/.test(line)).length;
|
| 43 |
+
};
|
| 44 |
+
const lines = countLinesFromShiki() || countLinesFromText();
|
| 45 |
+
const isSingleLine = lines <= 1;
|
| 46 |
+
// Also treat code blocks shorter than a threshold as single-line (defensive)
|
| 47 |
+
if (!isSingleLine) {
|
| 48 |
+
const approxChars = (() => {
|
| 49 |
+
const extract = (n) => Array.isArray(n?.children) ? n.children.map(extract).join('') : (n?.type === 'text' ? String(n.value||'') : '');
|
| 50 |
+
return extract(code).length;
|
| 51 |
+
})();
|
| 52 |
+
if (approxChars < 6) {
|
| 53 |
+
node.__forceSingle = true;
|
| 54 |
+
}
|
| 55 |
+
}
|
| 56 |
+
// Replace <pre> with wrapper div.code-card containing button + pre
|
| 57 |
+
const wrapper = {
|
| 58 |
+
type: 'element',
|
| 59 |
+
tagName: 'div',
|
| 60 |
+
properties: { className: ['code-card'].concat((isSingleLine || node.__forceSingle) ? ['no-copy'] : []) },
|
| 61 |
+
children: (isSingleLine || node.__forceSingle) ? [ node ] : [
|
| 62 |
+
{
|
| 63 |
+
type: 'element',
|
| 64 |
+
tagName: 'button',
|
| 65 |
+
properties: { className: ['code-copy', 'button--ghost'], type: 'button', 'aria-label': 'Copy code' },
|
| 66 |
+
children: [
|
| 67 |
+
{
|
| 68 |
+
type: 'element',
|
| 69 |
+
tagName: 'svg',
|
| 70 |
+
properties: { viewBox: '0 0 24 24', 'aria-hidden': 'true', focusable: 'false' },
|
| 71 |
+
children: [
|
| 72 |
+
{ type: 'element', tagName: 'path', properties: { d: 'M16 1H4c-1.1 0-2 .9-2 2v12h2V3h12V1zm3 4H8c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h11c1.1 0 2-.9 2-2V7c0-1.1-.9-2-2-2zm0 16H8V7h11v14z' }, children: [] }
|
| 73 |
+
]
|
| 74 |
+
}
|
| 75 |
+
]
|
| 76 |
+
},
|
| 77 |
+
node
|
| 78 |
+
]
|
| 79 |
+
};
|
| 80 |
+
if (parent && Array.isArray(parent.children)) {
|
| 81 |
+
const idx = parent.children.indexOf(node);
|
| 82 |
+
if (idx !== -1) parent.children[idx] = wrapper;
|
| 83 |
+
}
|
| 84 |
+
return; // don't visit nested
|
| 85 |
+
}
|
| 86 |
+
children.forEach((c) => visit(c, node));
|
| 87 |
+
};
|
| 88 |
+
visit(tree, null);
|
| 89 |
+
};
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
|
app/plugins/rehype/post-citation.mjs
ADDED
|
@@ -0,0 +1,441 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// rehype plugin to post-process citations and footnotes at build-time
|
| 2 |
+
// - Normalizes the bibliography into <ol class="references"> with <li id="...">
|
| 3 |
+
// - Linkifies DOI/URL occurrences inside references
|
| 4 |
+
// - Appends back-reference links (↩ back: 1, 2, ...) from each reference to in-text citation anchors
|
| 5 |
+
// - Cleans up footnotes block (.footnotes)
|
| 6 |
+
|
| 7 |
+
export default function rehypeReferencesAndFootnotes() {
|
| 8 |
+
return (tree) => {
|
| 9 |
+
const isElement = (n) => n && typeof n === 'object' && n.type === 'element';
|
| 10 |
+
const getChildren = (n) => (Array.isArray(n?.children) ? n.children : []);
|
| 11 |
+
|
| 12 |
+
const walk = (node, parent, fn) => {
|
| 13 |
+
if (!node || typeof node !== 'object') return;
|
| 14 |
+
fn && fn(node, parent);
|
| 15 |
+
const kids = getChildren(node);
|
| 16 |
+
for (const child of kids) walk(child, node, fn);
|
| 17 |
+
};
|
| 18 |
+
|
| 19 |
+
const ensureArray = (v) => (Array.isArray(v) ? v : v != null ? [v] : []);
|
| 20 |
+
|
| 21 |
+
const hasClass = (el, name) => {
|
| 22 |
+
const cn = ensureArray(el?.properties?.className).map(String);
|
| 23 |
+
return cn.includes(name);
|
| 24 |
+
};
|
| 25 |
+
|
| 26 |
+
const setAttr = (el, key, val) => {
|
| 27 |
+
el.properties = el.properties || {};
|
| 28 |
+
if (val == null) delete el.properties[key];
|
| 29 |
+
else el.properties[key] = val;
|
| 30 |
+
};
|
| 31 |
+
|
| 32 |
+
const getAttr = (el, key) => (el?.properties ? el.properties[key] : undefined);
|
| 33 |
+
|
| 34 |
+
// Shared helpers for backlinks + backrefs block
|
| 35 |
+
const collectBacklinksForIdSet = (idSet, anchorPrefix) => {
|
| 36 |
+
const idToBacklinks = new Map();
|
| 37 |
+
const idToAnchorNodes = new Map();
|
| 38 |
+
if (!idSet || idSet.size === 0) return { idToBacklinks, idToAnchorNodes };
|
| 39 |
+
walk(tree, null, (node) => {
|
| 40 |
+
if (!isElement(node) || node.tagName !== 'a') return;
|
| 41 |
+
const href = String(getAttr(node, 'href') || '');
|
| 42 |
+
if (!href.startsWith('#')) return;
|
| 43 |
+
const id = href.slice(1);
|
| 44 |
+
if (!idSet.has(id)) return;
|
| 45 |
+
// Ensure a stable id
|
| 46 |
+
let anchorId = String(getAttr(node, 'id') || '');
|
| 47 |
+
if (!anchorId) {
|
| 48 |
+
const list = idToBacklinks.get(id) || [];
|
| 49 |
+
anchorId = `${anchorPrefix}-${id}-${list.length + 1}`;
|
| 50 |
+
setAttr(node, 'id', anchorId);
|
| 51 |
+
}
|
| 52 |
+
const list = idToBacklinks.get(id) || [];
|
| 53 |
+
list.push(anchorId);
|
| 54 |
+
idToBacklinks.set(id, list);
|
| 55 |
+
const nodes = idToAnchorNodes.get(id) || [];
|
| 56 |
+
nodes.push(node);
|
| 57 |
+
idToAnchorNodes.set(id, nodes);
|
| 58 |
+
});
|
| 59 |
+
return { idToBacklinks, idToAnchorNodes };
|
| 60 |
+
};
|
| 61 |
+
|
| 62 |
+
const createBackIcon = () => ({
|
| 63 |
+
type: 'element',
|
| 64 |
+
tagName: 'svg',
|
| 65 |
+
properties: {
|
| 66 |
+
className: ['back-icon'],
|
| 67 |
+
width: 12,
|
| 68 |
+
height: 12,
|
| 69 |
+
viewBox: '0 0 24 24',
|
| 70 |
+
fill: 'none',
|
| 71 |
+
stroke: 'currentColor',
|
| 72 |
+
'stroke-width': 2,
|
| 73 |
+
'stroke-linecap': 'round',
|
| 74 |
+
'stroke-linejoin': 'round',
|
| 75 |
+
'aria-hidden': 'true',
|
| 76 |
+
focusable: 'false'
|
| 77 |
+
},
|
| 78 |
+
children: [
|
| 79 |
+
{ type: 'element', tagName: 'line', properties: { x1: 12, y1: 19, x2: 12, y2: 5 }, children: [] },
|
| 80 |
+
{ type: 'element', tagName: 'polyline', properties: { points: '5 12 12 5 19 12' }, children: [] }
|
| 81 |
+
]
|
| 82 |
+
});
|
| 83 |
+
|
| 84 |
+
const appendBackrefsBlock = (listElement, idToBacklinks, ariaLabel) => {
|
| 85 |
+
if (!listElement || !idToBacklinks || idToBacklinks.size === 0) return;
|
| 86 |
+
for (const li of getChildren(listElement)) {
|
| 87 |
+
if (!isElement(li) || li.tagName !== 'li') continue;
|
| 88 |
+
const id = String(getAttr(li, 'id') || '');
|
| 89 |
+
if (!id) continue;
|
| 90 |
+
const keys = idToBacklinks.get(id);
|
| 91 |
+
if (!keys || !keys.length) continue;
|
| 92 |
+
// Remove pre-existing .backrefs in this li to avoid duplicates
|
| 93 |
+
li.children = getChildren(li).filter((n) => !(isElement(n) && n.tagName === 'small' && hasClass(n, 'backrefs')));
|
| 94 |
+
const small = {
|
| 95 |
+
type: 'element',
|
| 96 |
+
tagName: 'small',
|
| 97 |
+
properties: { className: ['backrefs'] },
|
| 98 |
+
children: []
|
| 99 |
+
};
|
| 100 |
+
if (keys.length === 1) {
|
| 101 |
+
// Single backlink: just the icon wrapped in the anchor
|
| 102 |
+
const a = {
|
| 103 |
+
type: 'element',
|
| 104 |
+
tagName: 'a',
|
| 105 |
+
properties: { href: `#${keys[0]}`, 'aria-label': ariaLabel },
|
| 106 |
+
children: [ createBackIcon() ]
|
| 107 |
+
};
|
| 108 |
+
small.children.push(a);
|
| 109 |
+
} else {
|
| 110 |
+
// Multiple backlinks: icon + label + numbered links
|
| 111 |
+
small.children.push(createBackIcon());
|
| 112 |
+
small.children.push({ type: 'text', value: ' back: ' });
|
| 113 |
+
keys.forEach((backId, idx) => {
|
| 114 |
+
small.children.push({
|
| 115 |
+
type: 'element',
|
| 116 |
+
tagName: 'a',
|
| 117 |
+
properties: { href: `#${backId}`, 'aria-label': ariaLabel },
|
| 118 |
+
children: [ { type: 'text', value: String(idx + 1) } ]
|
| 119 |
+
});
|
| 120 |
+
if (idx < keys.length - 1) small.children.push({ type: 'text', value: ', ' });
|
| 121 |
+
});
|
| 122 |
+
}
|
| 123 |
+
li.children.push(small);
|
| 124 |
+
}
|
| 125 |
+
};
|
| 126 |
+
// Remove default back-reference anchors generated by remark-footnotes inside a footnote item
|
| 127 |
+
const getTextContent = (el) => {
|
| 128 |
+
if (!el) return '';
|
| 129 |
+
const stack = [el];
|
| 130 |
+
let out = '';
|
| 131 |
+
while (stack.length) {
|
| 132 |
+
const cur = stack.pop();
|
| 133 |
+
if (!cur) continue;
|
| 134 |
+
if (cur.type === 'text') out += String(cur.value || '');
|
| 135 |
+
const kids = getChildren(cur);
|
| 136 |
+
for (let i = kids.length - 1; i >= 0; i--) stack.push(kids[i]);
|
| 137 |
+
}
|
| 138 |
+
return out;
|
| 139 |
+
};
|
| 140 |
+
|
| 141 |
+
const removeFootnoteBackrefAnchors = (el) => {
|
| 142 |
+
if (!isElement(el)) return;
|
| 143 |
+
const kids = getChildren(el);
|
| 144 |
+
for (let i = kids.length - 1; i >= 0; i--) {
|
| 145 |
+
const child = kids[i];
|
| 146 |
+
if (isElement(child)) {
|
| 147 |
+
if (
|
| 148 |
+
child.tagName === 'a' && (
|
| 149 |
+
getAttr(child, 'data-footnote-backref') != null ||
|
| 150 |
+
hasClass(child, 'footnote-backref') ||
|
| 151 |
+
String(getAttr(child, 'role') || '').toLowerCase() === 'doc-backlink' ||
|
| 152 |
+
String(getAttr(child, 'aria-label') || '').toLowerCase().includes('back to content') ||
|
| 153 |
+
String(getAttr(child, 'href') || '').startsWith('#fnref') ||
|
| 154 |
+
// Fallback: text-based detection like "↩" or "↩2"
|
| 155 |
+
/^\s*↩\s*\d*\s*$/u.test(getTextContent(child))
|
| 156 |
+
)
|
| 157 |
+
) {
|
| 158 |
+
// Remove the anchor
|
| 159 |
+
el.children.splice(i, 1);
|
| 160 |
+
continue;
|
| 161 |
+
}
|
| 162 |
+
// Recurse into element
|
| 163 |
+
removeFootnoteBackrefAnchors(child);
|
| 164 |
+
// If a wrapper like <sup> or <span> became empty, remove it
|
| 165 |
+
const becameKids = getChildren(child);
|
| 166 |
+
if ((child.tagName === 'sup' || child.tagName === 'span') && (!becameKids || becameKids.length === 0)) {
|
| 167 |
+
el.children.splice(i, 1);
|
| 168 |
+
}
|
| 169 |
+
}
|
| 170 |
+
}
|
| 171 |
+
};
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
const normDoiHref = (href) => {
|
| 175 |
+
if (!href) return href;
|
| 176 |
+
const DUP = /https?:\/\/(?:dx\.)?doi\.org\/(?:https?:\/\/(?:dx\.)?doi\.org\/)+/gi;
|
| 177 |
+
const ONE = /https?:\/\/(?:dx\.)?doi\.org\/(10\.[^\s<>"']+)/i;
|
| 178 |
+
href = String(href).replace(DUP, 'https://doi.org/');
|
| 179 |
+
const m = href.match(ONE);
|
| 180 |
+
return m ? `https://doi.org/${m[1]}` : href;
|
| 181 |
+
};
|
| 182 |
+
|
| 183 |
+
const DOI_BARE = /\b10\.[0-9]{4,9}\/[\-._;()\/:A-Z0-9]+\b/gi;
|
| 184 |
+
const URL_GEN = /\bhttps?:\/\/[^\s<>()"']+/gi;
|
| 185 |
+
|
| 186 |
+
const linkifyTextNode = (textNode) => {
|
| 187 |
+
const text = String(textNode.value || '');
|
| 188 |
+
let last = 0;
|
| 189 |
+
const parts = [];
|
| 190 |
+
const pushText = (s) => { if (s) parts.push({ type: 'text', value: s }); };
|
| 191 |
+
|
| 192 |
+
const matches = [];
|
| 193 |
+
// Collect URL matches
|
| 194 |
+
let m;
|
| 195 |
+
URL_GEN.lastIndex = 0;
|
| 196 |
+
while ((m = URL_GEN.exec(text)) !== null) {
|
| 197 |
+
matches.push({ type: 'url', start: m.index, end: URL_GEN.lastIndex, raw: m[0] });
|
| 198 |
+
}
|
| 199 |
+
// Collect DOI matches
|
| 200 |
+
DOI_BARE.lastIndex = 0;
|
| 201 |
+
while ((m = DOI_BARE.exec(text)) !== null) {
|
| 202 |
+
matches.push({ type: 'doi', start: m.index, end: DOI_BARE.lastIndex, raw: m[0] });
|
| 203 |
+
}
|
| 204 |
+
matches.sort((a, b) => a.start - b.start);
|
| 205 |
+
|
| 206 |
+
for (const match of matches) {
|
| 207 |
+
if (match.start < last) continue; // overlapping
|
| 208 |
+
pushText(text.slice(last, match.start));
|
| 209 |
+
if (match.type === 'url') {
|
| 210 |
+
const href = normDoiHref(match.raw);
|
| 211 |
+
const doiOne = href.match(/https?:\/\/(?:dx\.)?doi\.org\/(10\.[^\s<>"']+)/i);
|
| 212 |
+
const a = {
|
| 213 |
+
type: 'element',
|
| 214 |
+
tagName: 'a',
|
| 215 |
+
properties: { href, target: '_blank', rel: 'noopener noreferrer' },
|
| 216 |
+
children: [{ type: 'text', value: doiOne ? doiOne[1] : href }]
|
| 217 |
+
};
|
| 218 |
+
parts.push(a);
|
| 219 |
+
} else {
|
| 220 |
+
const href = `https://doi.org/${match.raw}`;
|
| 221 |
+
const a = {
|
| 222 |
+
type: 'element',
|
| 223 |
+
tagName: 'a',
|
| 224 |
+
properties: { href, target: '_blank', rel: 'noopener noreferrer' },
|
| 225 |
+
children: [{ type: 'text', value: match.raw }]
|
| 226 |
+
};
|
| 227 |
+
parts.push(a);
|
| 228 |
+
}
|
| 229 |
+
last = match.end;
|
| 230 |
+
}
|
| 231 |
+
|
| 232 |
+
pushText(text.slice(last));
|
| 233 |
+
return parts;
|
| 234 |
+
};
|
| 235 |
+
|
| 236 |
+
const linkifyInElement = (el) => {
|
| 237 |
+
const kids = getChildren(el);
|
| 238 |
+
for (let i = 0; i < kids.length; i++) {
|
| 239 |
+
const child = kids[i];
|
| 240 |
+
if (!child) continue;
|
| 241 |
+
if (child.type === 'text') {
|
| 242 |
+
const replacement = linkifyTextNode(child);
|
| 243 |
+
if (replacement.length === 1 && replacement[0].type === 'text') continue;
|
| 244 |
+
// Replace the single text node with multiple nodes
|
| 245 |
+
el.children.splice(i, 1, ...replacement);
|
| 246 |
+
i += replacement.length - 1;
|
| 247 |
+
} else if (isElement(child)) {
|
| 248 |
+
if (child.tagName === 'a') {
|
| 249 |
+
const href = normDoiHref(getAttr(child, 'href'));
|
| 250 |
+
setAttr(child, 'href', href);
|
| 251 |
+
const m = String(href || '').match(/https?:\/\/(?:dx\.)?doi\.org\/(10\.[^\s<>"']+)/i);
|
| 252 |
+
if (m && (!child.children || child.children.length === 0)) {
|
| 253 |
+
child.children = [{ type: 'text', value: m[1] }];
|
| 254 |
+
}
|
| 255 |
+
continue;
|
| 256 |
+
}
|
| 257 |
+
linkifyInElement(child);
|
| 258 |
+
}
|
| 259 |
+
}
|
| 260 |
+
// Deduplicate adjacent identical anchors
|
| 261 |
+
for (let i = 1; i < el.children.length; i++) {
|
| 262 |
+
const prev = el.children[i - 1];
|
| 263 |
+
const curr = el.children[i];
|
| 264 |
+
if (isElement(prev) && isElement(curr) && prev.tagName === 'a' && curr.tagName === 'a') {
|
| 265 |
+
const key = `${getAttr(prev, 'href') || ''}|${(prev.children?.[0]?.value) || ''}`;
|
| 266 |
+
const key2 = `${getAttr(curr, 'href') || ''}|${(curr.children?.[0]?.value) || ''}`;
|
| 267 |
+
if (key === key2) {
|
| 268 |
+
el.children.splice(i, 1);
|
| 269 |
+
i--;
|
| 270 |
+
}
|
| 271 |
+
}
|
| 272 |
+
}
|
| 273 |
+
};
|
| 274 |
+
|
| 275 |
+
// Find references container and normalize its list
|
| 276 |
+
const findReferencesRoot = () => {
|
| 277 |
+
let found = null;
|
| 278 |
+
walk(tree, null, (node) => {
|
| 279 |
+
if (found) return;
|
| 280 |
+
if (!isElement(node)) return;
|
| 281 |
+
const id = getAttr(node, 'id');
|
| 282 |
+
if (id === 'references' || hasClass(node, 'references') || hasClass(node, 'bibliography')) {
|
| 283 |
+
found = node;
|
| 284 |
+
}
|
| 285 |
+
});
|
| 286 |
+
return found;
|
| 287 |
+
};
|
| 288 |
+
|
| 289 |
+
const toOrderedList = (container) => {
|
| 290 |
+
// If there is already an <ol>, use it; otherwise convert common structures
|
| 291 |
+
let ol = getChildren(container).find((c) => isElement(c) && c.tagName === 'ol');
|
| 292 |
+
if (!ol) {
|
| 293 |
+
ol = { type: 'element', tagName: 'ol', properties: { className: ['references'] }, children: [] };
|
| 294 |
+
const candidates = getChildren(container).filter((n) => isElement(n));
|
| 295 |
+
if (candidates.length) {
|
| 296 |
+
for (const node of candidates) {
|
| 297 |
+
if (hasClass(node, 'csl-entry') || node.tagName === 'li' || node.tagName === 'p' || node.tagName === 'div') {
|
| 298 |
+
const li = { type: 'element', tagName: 'li', properties: {}, children: getChildren(node) };
|
| 299 |
+
if (getAttr(node, 'id')) setAttr(li, 'id', getAttr(node, 'id'));
|
| 300 |
+
ol.children.push(li);
|
| 301 |
+
}
|
| 302 |
+
}
|
| 303 |
+
}
|
| 304 |
+
// Replace container children by the new ol
|
| 305 |
+
container.children = [ol];
|
| 306 |
+
}
|
| 307 |
+
if (!hasClass(ol, 'references')) {
|
| 308 |
+
const cls = ensureArray(ol.properties?.className).map(String);
|
| 309 |
+
if (!cls.includes('references')) cls.push('references');
|
| 310 |
+
ol.properties = ol.properties || {};
|
| 311 |
+
ol.properties.className = cls;
|
| 312 |
+
}
|
| 313 |
+
return ol;
|
| 314 |
+
};
|
| 315 |
+
|
| 316 |
+
const refsRoot = findReferencesRoot();
|
| 317 |
+
let refsOl = null;
|
| 318 |
+
const refIdSet = new Set();
|
| 319 |
+
const refIdToExternalHref = new Map();
|
| 320 |
+
|
| 321 |
+
if (refsRoot) {
|
| 322 |
+
refsOl = toOrderedList(refsRoot);
|
| 323 |
+
// Collect item ids and linkify their content
|
| 324 |
+
for (const li of getChildren(refsOl)) {
|
| 325 |
+
if (!isElement(li) || li.tagName !== 'li') continue;
|
| 326 |
+
if (!getAttr(li, 'id')) {
|
| 327 |
+
// Try to find a nested element with id to promote
|
| 328 |
+
const nestedWithId = getChildren(li).find((n) => isElement(n) && getAttr(n, 'id'));
|
| 329 |
+
if (nestedWithId) setAttr(li, 'id', getAttr(nestedWithId, 'id'));
|
| 330 |
+
}
|
| 331 |
+
const id = getAttr(li, 'id');
|
| 332 |
+
if (id) refIdSet.add(String(id));
|
| 333 |
+
linkifyInElement(li);
|
| 334 |
+
// Record first external link href (e.g., DOI/URL) if present
|
| 335 |
+
if (id) {
|
| 336 |
+
let externalHref = null;
|
| 337 |
+
const stack = [li];
|
| 338 |
+
while (stack.length) {
|
| 339 |
+
const cur = stack.pop();
|
| 340 |
+
const kids = getChildren(cur);
|
| 341 |
+
for (const k of kids) {
|
| 342 |
+
if (isElement(k) && k.tagName === 'a') {
|
| 343 |
+
const href = String(getAttr(k, 'href') || '');
|
| 344 |
+
if (/^https?:\/\//i.test(href)) {
|
| 345 |
+
externalHref = href;
|
| 346 |
+
break;
|
| 347 |
+
}
|
| 348 |
+
}
|
| 349 |
+
if (isElement(k)) stack.push(k);
|
| 350 |
+
}
|
| 351 |
+
if (externalHref) break;
|
| 352 |
+
}
|
| 353 |
+
if (externalHref) refIdToExternalHref.set(String(id), externalHref);
|
| 354 |
+
}
|
| 355 |
+
}
|
| 356 |
+
setAttr(refsRoot, 'data-built-refs', '1');
|
| 357 |
+
}
|
| 358 |
+
|
| 359 |
+
// Collect in-text anchors that point to references ids
|
| 360 |
+
const { idToBacklinks: refIdToBacklinks, idToAnchorNodes: refIdToCitationAnchors } = collectBacklinksForIdSet(refIdSet, 'refctx');
|
| 361 |
+
|
| 362 |
+
// Append backlinks into references list items
|
| 363 |
+
appendBackrefsBlock(refsOl, refIdToBacklinks, 'Back to citation');
|
| 364 |
+
|
| 365 |
+
// Rewrite in-text citation anchors to external link when available
|
| 366 |
+
if (refIdToCitationAnchors.size > 0) {
|
| 367 |
+
for (const [id, anchors] of refIdToCitationAnchors.entries()) {
|
| 368 |
+
const ext = refIdToExternalHref.get(id);
|
| 369 |
+
if (!ext) continue;
|
| 370 |
+
for (const a of anchors) {
|
| 371 |
+
setAttr(a, 'data-ref-id', id);
|
| 372 |
+
setAttr(a, 'href', ext);
|
| 373 |
+
const existingTarget = getAttr(a, 'target');
|
| 374 |
+
if (!existingTarget) setAttr(a, 'target', '_blank');
|
| 375 |
+
const rel = String(getAttr(a, 'rel') || '');
|
| 376 |
+
const relSet = new Set(rel ? rel.split(/\s+/) : []);
|
| 377 |
+
relSet.add('noopener');
|
| 378 |
+
relSet.add('noreferrer');
|
| 379 |
+
setAttr(a, 'rel', Array.from(relSet).join(' '));
|
| 380 |
+
}
|
| 381 |
+
}
|
| 382 |
+
}
|
| 383 |
+
|
| 384 |
+
// Footnotes cleanup + backrefs harmonized with references
|
| 385 |
+
const cleanupFootnotes = () => {
|
| 386 |
+
let root = null;
|
| 387 |
+
walk(tree, null, (node) => {
|
| 388 |
+
if (!isElement(node)) return;
|
| 389 |
+
if (hasClass(node, 'footnotes')) root = node;
|
| 390 |
+
});
|
| 391 |
+
if (!root) return { root: null, ol: null, idSet: new Set() };
|
| 392 |
+
// Remove <hr> direct children
|
| 393 |
+
root.children = getChildren(root).filter((n) => !(isElement(n) && n.tagName === 'hr'));
|
| 394 |
+
// Ensure an <ol>
|
| 395 |
+
let ol = getChildren(root).find((c) => isElement(c) && c.tagName === 'ol');
|
| 396 |
+
if (!ol) {
|
| 397 |
+
ol = { type: 'element', tagName: 'ol', properties: {}, children: [] };
|
| 398 |
+
const items = getChildren(root).filter((n) => isElement(n) && (n.tagName === 'li' || hasClass(n, 'footnote') || n.tagName === 'p' || n.tagName === 'div'));
|
| 399 |
+
if (items.length) {
|
| 400 |
+
for (const it of items) {
|
| 401 |
+
const li = { type: 'element', tagName: 'li', properties: {}, children: getChildren(it) };
|
| 402 |
+
// Promote nested id if present (e.g., <p id="fn-1">)
|
| 403 |
+
const nestedWithId = getChildren(it).find((n) => isElement(n) && getAttr(n, 'id'));
|
| 404 |
+
if (nestedWithId) setAttr(li, 'id', getAttr(nestedWithId, 'id'));
|
| 405 |
+
ol.children.push(li);
|
| 406 |
+
}
|
| 407 |
+
}
|
| 408 |
+
root.children = [ol];
|
| 409 |
+
}
|
| 410 |
+
// For existing structures, try to promote ids from children when missing
|
| 411 |
+
for (const li of getChildren(ol)) {
|
| 412 |
+
if (!isElement(li) || li.tagName !== 'li') continue;
|
| 413 |
+
if (!getAttr(li, 'id')) {
|
| 414 |
+
const nestedWithId = getChildren(li).find((n) => isElement(n) && getAttr(n, 'id'));
|
| 415 |
+
if (nestedWithId) setAttr(li, 'id', getAttr(nestedWithId, 'id'));
|
| 416 |
+
}
|
| 417 |
+
// Remove default footnote backrefs anywhere inside (to avoid duplication)
|
| 418 |
+
removeFootnoteBackrefAnchors(li);
|
| 419 |
+
}
|
| 420 |
+
setAttr(root, 'data-built-footnotes', '1');
|
| 421 |
+
// Collect id set
|
| 422 |
+
const idSet = new Set();
|
| 423 |
+
for (const li of getChildren(ol)) {
|
| 424 |
+
if (!isElement(li) || li.tagName !== 'li') continue;
|
| 425 |
+
const id = getAttr(li, 'id');
|
| 426 |
+
if (id) idSet.add(String(id));
|
| 427 |
+
}
|
| 428 |
+
return { root, ol, idSet };
|
| 429 |
+
};
|
| 430 |
+
|
| 431 |
+
const { root: footRoot, ol: footOl, idSet: footIdSet } = cleanupFootnotes();
|
| 432 |
+
|
| 433 |
+
// Collect in-text anchors pointing to footnotes
|
| 434 |
+
const { idToBacklinks: footIdToBacklinks } = collectBacklinksForIdSet(footIdSet, 'footctx');
|
| 435 |
+
|
| 436 |
+
// Append backlinks into footnote list items (identical pattern to references)
|
| 437 |
+
appendBackrefsBlock(footOl, footIdToBacklinks, 'Back to footnote call');
|
| 438 |
+
};
|
| 439 |
+
}
|
| 440 |
+
|
| 441 |
+
|
app/plugins/rehype/restore-at-in-code.mjs
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// Rehype plugin to restore '@' inside code nodes after rehype-citation ran
|
| 2 |
+
export default function rehypeRestoreAtInCode() {
|
| 3 |
+
return (tree) => {
|
| 4 |
+
const restoreInNode = (node) => {
|
| 5 |
+
if (!node || typeof node !== 'object') return;
|
| 6 |
+
const isText = node.type === 'text';
|
| 7 |
+
if (isText && typeof node.value === 'string' && node.value.includes('__AT_SENTINEL__')) {
|
| 8 |
+
node.value = node.value.replace(/__AT_SENTINEL__/g, '@');
|
| 9 |
+
}
|
| 10 |
+
const isCodeEl = node.type === 'element' && node.tagName === 'code';
|
| 11 |
+
const children = Array.isArray(node.children) ? node.children : [];
|
| 12 |
+
if (isCodeEl && children.length) {
|
| 13 |
+
children.forEach(restoreInNode);
|
| 14 |
+
return;
|
| 15 |
+
}
|
| 16 |
+
children.forEach(restoreInNode);
|
| 17 |
+
};
|
| 18 |
+
restoreInNode(tree);
|
| 19 |
+
};
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
|
app/plugins/rehype/wrap-outputs.mjs
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// Wrap plain-text content inside <section class="code-outputs"> into a <pre>
|
| 2 |
+
export default function rehypeWrapOutput() {
|
| 3 |
+
return (tree) => {
|
| 4 |
+
const isWhitespace = (value) => typeof value === 'string' && !/\S/.test(value);
|
| 5 |
+
const extractText = (node) => {
|
| 6 |
+
if (!node) return '';
|
| 7 |
+
if (node.type === 'text') return String(node.value || '');
|
| 8 |
+
const kids = Array.isArray(node.children) ? node.children : [];
|
| 9 |
+
return kids.map(extractText).join('');
|
| 10 |
+
};
|
| 11 |
+
const visit = (node) => {
|
| 12 |
+
if (!node || typeof node !== 'object') return;
|
| 13 |
+
const children = Array.isArray(node.children) ? node.children : [];
|
| 14 |
+
if (node.type === 'element' && node.tagName === 'section') {
|
| 15 |
+
const className = node.properties?.className || [];
|
| 16 |
+
const classes = Array.isArray(className) ? className : [className].filter(Boolean);
|
| 17 |
+
if (classes.includes('code-output')) {
|
| 18 |
+
const meaningful = children.filter((c) => !(c.type === 'text' && isWhitespace(c.value)));
|
| 19 |
+
if (meaningful.length === 1) {
|
| 20 |
+
const only = meaningful[0];
|
| 21 |
+
const isPlainParagraph = only.type === 'element' && only.tagName === 'p' && (only.children || []).every((c) => c.type === 'text');
|
| 22 |
+
const isPlainText = only.type === 'text';
|
| 23 |
+
if (isPlainParagraph || isPlainText) {
|
| 24 |
+
const text = isPlainText ? String(only.value || '') : extractText(only);
|
| 25 |
+
node.children = [
|
| 26 |
+
{ type: 'element', tagName: 'pre', properties: {}, children: [ { type: 'text', value: text } ] }
|
| 27 |
+
];
|
| 28 |
+
}
|
| 29 |
+
}
|
| 30 |
+
}
|
| 31 |
+
}
|
| 32 |
+
children.forEach(visit);
|
| 33 |
+
};
|
| 34 |
+
visit(tree);
|
| 35 |
+
};
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
|
app/plugins/rehype/wrap-tables.mjs
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// rehype plugin: wrap bare <table> elements in a <div class="table-scroll"> container
|
| 2 |
+
// so that tables stay width:100% while enabling horizontal scroll when content overflows
|
| 3 |
+
|
| 4 |
+
export default function rehypeWrapTables() {
|
| 5 |
+
return (tree) => {
|
| 6 |
+
const isElement = (n) => n && typeof n === 'object' && n.type === 'element';
|
| 7 |
+
const getChildren = (n) => (Array.isArray(n?.children) ? n.children : []);
|
| 8 |
+
|
| 9 |
+
const walk = (node, parent, fn) => {
|
| 10 |
+
if (!node || typeof node !== 'object') return;
|
| 11 |
+
fn && fn(node, parent);
|
| 12 |
+
const kids = getChildren(node);
|
| 13 |
+
for (const child of kids) walk(child, node, fn);
|
| 14 |
+
};
|
| 15 |
+
|
| 16 |
+
const ensureArray = (v) => (Array.isArray(v) ? v : v != null ? [v] : []);
|
| 17 |
+
const hasClass = (el, name) => ensureArray(el?.properties?.className).map(String).includes(name);
|
| 18 |
+
|
| 19 |
+
const wrapTable = (tableNode, parent) => {
|
| 20 |
+
if (!parent || !Array.isArray(parent.children)) return;
|
| 21 |
+
// Don't double-wrap if already inside .table-scroll
|
| 22 |
+
if (parent.tagName === 'div' && hasClass(parent, 'table-scroll')) return;
|
| 23 |
+
|
| 24 |
+
const wrapper = {
|
| 25 |
+
type: 'element',
|
| 26 |
+
tagName: 'div',
|
| 27 |
+
properties: { className: ['table-scroll'] },
|
| 28 |
+
children: [tableNode]
|
| 29 |
+
};
|
| 30 |
+
|
| 31 |
+
const idx = parent.children.indexOf(tableNode);
|
| 32 |
+
if (idx >= 0) parent.children.splice(idx, 1, wrapper);
|
| 33 |
+
};
|
| 34 |
+
|
| 35 |
+
walk(tree, null, (node, parent) => {
|
| 36 |
+
if (!isElement(node)) return;
|
| 37 |
+
if (node.tagName !== 'table') return;
|
| 38 |
+
wrapTable(node, parent);
|
| 39 |
+
});
|
| 40 |
+
};
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
|
app/plugins/remark/ignore-citations-in-code.mjs
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// Remark plugin to ignore citations inside code (block and inline)
|
| 2 |
+
export default function remarkIgnoreCitationsInCode() {
|
| 3 |
+
return (tree) => {
|
| 4 |
+
const visit = (node) => {
|
| 5 |
+
if (!node || typeof node !== 'object') return;
|
| 6 |
+
const type = node.type;
|
| 7 |
+
if (type === 'code' || type === 'inlineCode') {
|
| 8 |
+
if (typeof node.value === 'string' && node.value.includes('@')) {
|
| 9 |
+
// Use a sentinel to avoid rehype-citation, will be restored later in rehype
|
| 10 |
+
node.value = node.value.replace(/@/g, '__AT_SENTINEL__');
|
| 11 |
+
}
|
| 12 |
+
return; // do not traverse into code
|
| 13 |
+
}
|
| 14 |
+
const children = Array.isArray(node.children) ? node.children : [];
|
| 15 |
+
children.forEach(visit);
|
| 16 |
+
};
|
| 17 |
+
visit(tree);
|
| 18 |
+
};
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
|
app/plugins/remark/output-container.mjs
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// Transform `:::output ... :::` into a <section class="code-output"> wrapper
|
| 2 |
+
// Requires remark-directive to be applied before this plugin
|
| 3 |
+
|
| 4 |
+
export default function remarkOutputContainer() {
|
| 5 |
+
return (tree) => {
|
| 6 |
+
const visit = (node) => {
|
| 7 |
+
if (!node || typeof node !== 'object') return;
|
| 8 |
+
|
| 9 |
+
if (node.type === 'containerDirective' && node.name === 'output') {
|
| 10 |
+
node.data = node.data || {};
|
| 11 |
+
node.data.hName = 'section';
|
| 12 |
+
node.data.hProperties = { className: ['code-output'] };
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
const children = Array.isArray(node.children) ? node.children : [];
|
| 16 |
+
for (const child of children) visit(child);
|
| 17 |
+
};
|
| 18 |
+
|
| 19 |
+
visit(tree);
|
| 20 |
+
};
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
|
app/plugins/remark/outputs-container.mjs
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// Transform `:::outputs ... :::` into a <section class="code-outputs"> wrapper
|
| 2 |
+
// Requires remark-directive to be applied before this plugin
|
| 3 |
+
|
| 4 |
+
export default function remarkOutputsContainer() {
|
| 5 |
+
return (tree) => {
|
| 6 |
+
const visit = (node) => {
|
| 7 |
+
if (!node || typeof node !== 'object') return;
|
| 8 |
+
|
| 9 |
+
if (node.type === 'containerDirective' && node.name === 'outputs') {
|
| 10 |
+
node.data = node.data || {};
|
| 11 |
+
node.data.hName = 'section';
|
| 12 |
+
node.data.hProperties = { className: ['code-outputs'] };
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
const children = Array.isArray(node.children) ? node.children : [];
|
| 16 |
+
for (const child of children) visit(child);
|
| 17 |
+
};
|
| 18 |
+
|
| 19 |
+
visit(tree);
|
| 20 |
+
};
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
|
app/postcss.config.mjs
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// PostCSS config enabling Custom Media Queries
|
| 2 |
+
// Allows usage of: @media (--bp-content-collapse) { ... }
|
| 3 |
+
|
| 4 |
+
import postcssCustomMedia from 'postcss-custom-media';
|
| 5 |
+
import postcssPresetEnv from 'postcss-preset-env';
|
| 6 |
+
|
| 7 |
+
export default {
|
| 8 |
+
plugins: [
|
| 9 |
+
postcssCustomMedia(),
|
| 10 |
+
postcssPresetEnv({
|
| 11 |
+
stage: 0
|
| 12 |
+
})
|
| 13 |
+
]
|
| 14 |
+
};
|
app/public/data
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
../src/content/assets/data
|
app/public/scripts/color-palettes.js
ADDED
|
@@ -0,0 +1,274 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// Global color palettes generator and watcher
|
| 2 |
+
// - Observes CSS variable --primary-color and theme changes
|
| 3 |
+
// - Generates categorical, sequential, and diverging palettes (OKLCH/OKLab)
|
| 4 |
+
// - Exposes results as CSS variables on :root
|
| 5 |
+
// - Supports variable color counts per palette via CSS vars
|
| 6 |
+
// - Dispatches a 'palettes:updated' CustomEvent after each update
|
| 7 |
+
|
| 8 |
+
(() => {
|
| 9 |
+
const MODE = { cssRoot: document.documentElement };
|
| 10 |
+
|
| 11 |
+
const getCssVar = (name) => {
|
| 12 |
+
try { return getComputedStyle(MODE.cssRoot).getPropertyValue(name).trim(); } catch { return ''; }
|
| 13 |
+
};
|
| 14 |
+
const getIntFromCssVar = (name, fallback) => {
|
| 15 |
+
const raw = getCssVar(name);
|
| 16 |
+
if (!raw) return fallback;
|
| 17 |
+
const v = parseInt(String(raw), 10);
|
| 18 |
+
if (Number.isNaN(v)) return fallback;
|
| 19 |
+
return v;
|
| 20 |
+
};
|
| 21 |
+
const clamp = (n, min, max) => Math.max(min, Math.min(max, n));
|
| 22 |
+
|
| 23 |
+
// Color math (OKLab/OKLCH)
|
| 24 |
+
const srgbToLinear = (u) => (u <= 0.04045 ? u / 12.92 : Math.pow((u + 0.055) / 1.055, 2.4));
|
| 25 |
+
const linearToSrgb = (u) => (u <= 0.0031308 ? 12.92 * u : 1.055 * Math.pow(Math.max(0, u), 1 / 2.4) - 0.055);
|
| 26 |
+
const rgbToOklab = (r, g, b) => {
|
| 27 |
+
const rl = srgbToLinear(r), gl = srgbToLinear(g), bl = srgbToLinear(b);
|
| 28 |
+
const l = Math.cbrt(0.4122214708 * rl + 0.5363325363 * gl + 0.0514459929 * bl);
|
| 29 |
+
const m = Math.cbrt(0.2119034982 * rl + 0.6806995451 * gl + 0.1073969566 * bl);
|
| 30 |
+
const s = Math.cbrt(0.0883024619 * rl + 0.2817188376 * gl + 0.6299787005 * bl);
|
| 31 |
+
const L = 0.2104542553 * l + 0.7936177850 * m - 0.0040720468 * s;
|
| 32 |
+
const a = 1.9779984951 * l - 2.4285922050 * m + 0.4505937099 * s;
|
| 33 |
+
const b2 = 0.0259040371 * l + 0.7827717662 * m - 0.8086757660 * s;
|
| 34 |
+
return { L, a, b: b2 };
|
| 35 |
+
};
|
| 36 |
+
const oklabToRgb = (L, a, b) => {
|
| 37 |
+
const l_ = L + 0.3963377774 * a + 0.2158037573 * b;
|
| 38 |
+
const m_ = L - 0.1055613458 * a - 0.0638541728 * b;
|
| 39 |
+
const s_ = L - 0.0894841775 * a - 1.2914855480 * b;
|
| 40 |
+
const l = l_ * l_ * l_;
|
| 41 |
+
const m = m_ * m_ * m_;
|
| 42 |
+
const s = s_ * s_ * s_;
|
| 43 |
+
const r = linearToSrgb(+4.0767416621 * l - 3.3077115913 * m + 0.2309699292 * s);
|
| 44 |
+
const g = linearToSrgb(-1.2684380046 * l + 2.6097574011 * m - 0.3413193965 * s);
|
| 45 |
+
const b3 = linearToSrgb(-0.0041960863 * l - 0.7034186147 * m + 1.7076147010 * s);
|
| 46 |
+
return { r, g, b: b3 };
|
| 47 |
+
};
|
| 48 |
+
const oklchToOklab = (L, C, hDeg) => { const h = (hDeg * Math.PI) / 180; return { L, a: C * Math.cos(h), b: C * Math.sin(h) }; };
|
| 49 |
+
const oklabToOklch = (L, a, b) => { const C = Math.sqrt(a * a + b * b); let h = Math.atan2(b, a) * 180 / Math.PI; if (h < 0) h += 360; return { L, C, h }; };
|
| 50 |
+
const clamp01 = (x) => Math.min(1, Math.max(0, x));
|
| 51 |
+
const isInGamut = ({ r, g, b }) => r >= 0 && r <= 1 && g >= 0 && g <= 1 && b >= 0 && b <= 1;
|
| 52 |
+
const toHex = ({ r, g, b }) => {
|
| 53 |
+
const R = Math.round(clamp01(r) * 255), G = Math.round(clamp01(g) * 255), B = Math.round(clamp01(b) * 255);
|
| 54 |
+
const h = (n) => n.toString(16).padStart(2, '0');
|
| 55 |
+
return `#${h(R)}${h(G)}${h(B)}`.toUpperCase();
|
| 56 |
+
};
|
| 57 |
+
const oklchToHexSafe = (L, C, h) => { let c = C; for (let i = 0; i < 12; i++) { const { a, b } = oklchToOklab(L, c, h); const rgb = oklabToRgb(L, a, b); if (isInGamut(rgb)) return toHex(rgb); c = Math.max(0, c - 0.02); } return toHex(oklabToRgb(L, 0, 0)); };
|
| 58 |
+
const parseCssColorToRgb = (css) => { try { const el = document.createElement('span'); el.style.color = css; document.body.appendChild(el); const cs = getComputedStyle(el).color; document.body.removeChild(el); const m = cs.match(/rgba?\((\d+),\s*(\d+),\s*(\d+)/i); if (!m) return null; return { r: Number(m[1]) / 255, g: Number(m[2]) / 255, b: Number(m[3]) / 255 }; } catch { return null; } };
|
| 59 |
+
|
| 60 |
+
// Get primary color in OKLCH format to preserve precision
|
| 61 |
+
const getPrimaryOKLCH = () => {
|
| 62 |
+
const css = getCssVar('--primary-color');
|
| 63 |
+
if (!css) return null;
|
| 64 |
+
|
| 65 |
+
// For OKLCH colors, return the exact values without conversion
|
| 66 |
+
if (css.includes('oklch')) {
|
| 67 |
+
const oklchMatch = css.match(/oklch\(([^)]+)\)/);
|
| 68 |
+
if (oklchMatch) {
|
| 69 |
+
const values = oklchMatch[1].split(/\s+/).map(v => parseFloat(v.trim()));
|
| 70 |
+
if (values.length >= 3) {
|
| 71 |
+
const [L, C, h] = values;
|
| 72 |
+
return { L, C, h };
|
| 73 |
+
}
|
| 74 |
+
}
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
// For non-OKLCH colors, convert to OKLCH for consistency
|
| 78 |
+
const rgb = parseCssColorToRgb(css);
|
| 79 |
+
if (rgb) {
|
| 80 |
+
const { L, a, b } = rgbToOklab(rgb.r, rgb.g, rgb.b);
|
| 81 |
+
const { C, h } = oklabToOklch(L, a, b);
|
| 82 |
+
return { L, C, h };
|
| 83 |
+
}
|
| 84 |
+
return null;
|
| 85 |
+
};
|
| 86 |
+
|
| 87 |
+
// Keep getPrimaryHex for backward compatibility, but now it converts from OKLCH
|
| 88 |
+
const getPrimaryHex = () => {
|
| 89 |
+
const oklch = getPrimaryOKLCH();
|
| 90 |
+
if (!oklch) return null;
|
| 91 |
+
|
| 92 |
+
const { a, b } = oklchToOklab(oklch.L, oklch.C, oklch.h);
|
| 93 |
+
const rgb = oklabToRgb(oklch.L, a, b);
|
| 94 |
+
return toHex(rgb);
|
| 95 |
+
};
|
| 96 |
+
// No count management via CSS anymore; counts are passed directly to the API
|
| 97 |
+
|
| 98 |
+
const generators = {
|
| 99 |
+
categorical: (baseOKLCH, count) => {
|
| 100 |
+
const { L, C, h } = baseOKLCH;
|
| 101 |
+
const L0 = Math.min(0.85, Math.max(0.4, L));
|
| 102 |
+
const C0 = Math.min(0.35, Math.max(0.1, C || 0.2));
|
| 103 |
+
const total = Math.max(1, Math.min(12, count || 8));
|
| 104 |
+
const hueStep = 360 / total;
|
| 105 |
+
const results = [];
|
| 106 |
+
for (let i = 0; i < total; i++) {
|
| 107 |
+
const hDeg = (h + i * hueStep) % 360;
|
| 108 |
+
const lVar = ((i % 3) - 1) * 0.04;
|
| 109 |
+
results.push(oklchToHexSafe(Math.max(0.4, Math.min(0.85, L0 + lVar)), C0, hDeg));
|
| 110 |
+
}
|
| 111 |
+
return results;
|
| 112 |
+
},
|
| 113 |
+
sequential: (baseOKLCH, count) => {
|
| 114 |
+
const { L, C, h } = baseOKLCH;
|
| 115 |
+
const total = Math.max(1, Math.min(12, count || 8));
|
| 116 |
+
const startL = Math.max(0.25, L - 0.18);
|
| 117 |
+
const endL = Math.min(0.92, L + 0.18);
|
| 118 |
+
const cBase = Math.min(0.33, Math.max(0.08, C * 0.9 + 0.06));
|
| 119 |
+
const out = [];
|
| 120 |
+
for (let i = 0; i < total; i++) {
|
| 121 |
+
const t = total === 1 ? 0 : i / (total - 1);
|
| 122 |
+
const lNow = startL * (1 - t) + endL * t;
|
| 123 |
+
const cNow = cBase * (0.85 + 0.15 * (1 - Math.abs(0.5 - t) * 2));
|
| 124 |
+
out.push(oklchToHexSafe(lNow, cNow, h));
|
| 125 |
+
}
|
| 126 |
+
return out;
|
| 127 |
+
},
|
| 128 |
+
diverging: (baseOKLCH, count) => {
|
| 129 |
+
const { L, C, h } = baseOKLCH;
|
| 130 |
+
const total = Math.max(1, Math.min(12, count || 8));
|
| 131 |
+
|
| 132 |
+
// Left endpoint: EXACT primary color (no darkening)
|
| 133 |
+
const leftLab = oklchToOklab(L, C, h);
|
| 134 |
+
// Right endpoint: complement with same L and similar C (clamped safe)
|
| 135 |
+
const compH = (h + 180) % 360;
|
| 136 |
+
const cSafe = Math.min(0.35, Math.max(0.08, C));
|
| 137 |
+
const rightLab = oklchToOklab(L, cSafe, compH);
|
| 138 |
+
const whiteLab = { L: 0.98, a: 0, b: 0 }; // center near‑white
|
| 139 |
+
|
| 140 |
+
const hexFromOKLab = (L, a, b) => toHex(oklabToRgb(L, a, b));
|
| 141 |
+
const lerp = (a, b, t) => a + (b - a) * t;
|
| 142 |
+
const lerpOKLabHex = (A, B, t) => hexFromOKLab(lerp(A.L, B.L, t), lerp(A.a, B.a, t), lerp(A.b, B.b, t));
|
| 143 |
+
|
| 144 |
+
const out = [];
|
| 145 |
+
if (total % 2 === 1) {
|
| 146 |
+
const nSide = (total - 1) >> 1; // items on each side
|
| 147 |
+
// Left side: include left endpoint exactly at index 0
|
| 148 |
+
for (let i = 0; i < nSide; i++) {
|
| 149 |
+
const t = nSide <= 1 ? 0 : (i / (nSide - 1)); // 0 .. 1
|
| 150 |
+
// Move from leftLab to a value close (but not equal) to white; ensure last before center is lighter
|
| 151 |
+
const tt = t * 0.9; // keep some distance from pure white before center
|
| 152 |
+
out.push(lerpOKLabHex(leftLab, whiteLab, tt));
|
| 153 |
+
}
|
| 154 |
+
// Center
|
| 155 |
+
out.push(hexFromOKLab(whiteLab.L, whiteLab.a, whiteLab.b));
|
| 156 |
+
// Right side: start near white and end EXACTLY at rightLab
|
| 157 |
+
for (let i = 0; i < nSide; i++) {
|
| 158 |
+
const t = nSide <= 1 ? 1 : ((i + 1) / nSide); // (1/n)..1
|
| 159 |
+
const tt = Math.max(0.1, t); // avoid starting at pure white
|
| 160 |
+
out.push(lerpOKLabHex(whiteLab, rightLab, tt));
|
| 161 |
+
}
|
| 162 |
+
// Ensure first and last are exact endpoints
|
| 163 |
+
if (out.length) { out[0] = hexFromOKLab(leftLab.L, leftLab.a, leftLab.b); out[out.length - 1] = hexFromOKLab(rightLab.L, rightLab.a, rightLab.b); }
|
| 164 |
+
} else {
|
| 165 |
+
const nSide = total >> 1;
|
| 166 |
+
// Left half including left endpoint, approaching white but not reaching it
|
| 167 |
+
for (let i = 0; i < nSide; i++) {
|
| 168 |
+
const t = nSide <= 1 ? 0 : (i / (nSide - 1)); // 0 .. 1
|
| 169 |
+
const tt = t * 0.9;
|
| 170 |
+
out.push(lerpOKLabHex(leftLab, whiteLab, tt));
|
| 171 |
+
}
|
| 172 |
+
// Right half: mirror from near white to exact right endpoint
|
| 173 |
+
for (let i = 0; i < nSide; i++) {
|
| 174 |
+
const t = nSide <= 1 ? 1 : ((i + 1) / nSide); // (1/n)..1
|
| 175 |
+
const tt = Math.max(0.1, t);
|
| 176 |
+
out.push(lerpOKLabHex(whiteLab, rightLab, tt));
|
| 177 |
+
}
|
| 178 |
+
if (out.length) { out[0] = hexFromOKLab(leftLab.L, leftLab.a, leftLab.b); out[out.length - 1] = hexFromOKLab(rightLab.L, rightLab.a, rightLab.b); }
|
| 179 |
+
}
|
| 180 |
+
return out;
|
| 181 |
+
}
|
| 182 |
+
};
|
| 183 |
+
|
| 184 |
+
let lastSignature = '';
|
| 185 |
+
|
| 186 |
+
const updatePalettes = () => {
|
| 187 |
+
const primaryOKLCH = getPrimaryOKLCH();
|
| 188 |
+
const primaryHex = getPrimaryHex();
|
| 189 |
+
const signature = `${primaryOKLCH?.L},${primaryOKLCH?.C},${primaryOKLCH?.h}`;
|
| 190 |
+
if (signature === lastSignature) return;
|
| 191 |
+
lastSignature = signature;
|
| 192 |
+
try { document.dispatchEvent(new CustomEvent('palettes:updated', { detail: { primary: primaryHex, primaryOKLCH } })); } catch { }
|
| 193 |
+
};
|
| 194 |
+
|
| 195 |
+
const bootstrap = () => {
|
| 196 |
+
// Initial setup - only run once on page load
|
| 197 |
+
updatePalettes();
|
| 198 |
+
|
| 199 |
+
// Observer will handle all subsequent changes
|
| 200 |
+
const mo = new MutationObserver(() => updatePalettes());
|
| 201 |
+
mo.observe(MODE.cssRoot, { attributes: true, attributeFilter: ['style', 'data-theme'] });
|
| 202 |
+
|
| 203 |
+
// Utility: choose high-contrast (or softened) text style against an arbitrary background color
|
| 204 |
+
const pickTextStyleForBackground = (bgCss, opts = {}) => {
|
| 205 |
+
const cssRoot = document.documentElement;
|
| 206 |
+
const getCssVar = (name) => {
|
| 207 |
+
try { return getComputedStyle(cssRoot).getPropertyValue(name).trim(); } catch { return ''; }
|
| 208 |
+
};
|
| 209 |
+
const resolveCssToRgb01 = (css) => {
|
| 210 |
+
const rgb = parseCssColorToRgb(css);
|
| 211 |
+
if (!rgb) return null;
|
| 212 |
+
return rgb; // already 0..1
|
| 213 |
+
};
|
| 214 |
+
const mixRgb01 = (a, b, t) => ({ r: a.r * (1 - t) + b.r * t, g: a.g * (1 - t) + b.g * t, b: a.b * (1 - t) + b.b * t });
|
| 215 |
+
const relLum = (rgb) => {
|
| 216 |
+
const f = (u) => srgbToLinear(u);
|
| 217 |
+
return 0.2126 * f(rgb.r) + 0.7152 * f(rgb.g) + 0.0722 * f(rgb.b);
|
| 218 |
+
};
|
| 219 |
+
const contrast = (fg, bg) => {
|
| 220 |
+
const L1 = relLum(fg), L2 = relLum(bg); const a = Math.max(L1, L2), b = Math.min(L1, L2);
|
| 221 |
+
return (a + 0.05) / (b + 0.05);
|
| 222 |
+
};
|
| 223 |
+
try {
|
| 224 |
+
const bg = resolveCssToRgb01(bgCss);
|
| 225 |
+
if (!bg) return { fill: getCssVar('--text-color') || '#000', stroke: 'var(--transparent-page-contrast)', strokeWidth: 1 };
|
| 226 |
+
const candidatesCss = [getCssVar('--text-color') || '#111', getCssVar('--on-primary') || '#0f1115', '#000', '#fff'];
|
| 227 |
+
const candidates = candidatesCss
|
| 228 |
+
.map(css => ({ css, rgb: resolveCssToRgb01(css) }))
|
| 229 |
+
.filter(x => !!x.rgb);
|
| 230 |
+
// Pick the max contrast
|
| 231 |
+
let best = candidates[0]; let bestCR = contrast(best.rgb, bg);
|
| 232 |
+
for (let i = 1; i < candidates.length; i++) {
|
| 233 |
+
const cr = contrast(candidates[i].rgb, bg);
|
| 234 |
+
if (cr > bestCR) { best = candidates[i]; bestCR = cr; }
|
| 235 |
+
}
|
| 236 |
+
// Optional softening via blend factor (0..1), blending towards muted color
|
| 237 |
+
const blend = Math.min(1, Math.max(0, Number(opts.blend || 0)));
|
| 238 |
+
let finalRgb = best.rgb;
|
| 239 |
+
if (blend > 0) {
|
| 240 |
+
const mutedCss = getCssVar('--muted-color') || (getCssVar('--text-color') || '#111');
|
| 241 |
+
const mutedRgb = resolveCssToRgb01(mutedCss) || best.rgb;
|
| 242 |
+
finalRgb = mixRgb01(best.rgb, mutedRgb, blend);
|
| 243 |
+
}
|
| 244 |
+
const haloStrength = Math.min(1, Math.max(0, Number(opts.haloStrength == null ? 0.5 : opts.haloStrength)));
|
| 245 |
+
const stroke = (best.css === '#000' || best.css.toLowerCase() === 'black') ? `rgba(255,255,255,${0.30 + 0.40 * haloStrength})` : `rgba(0,0,0,${0.30 + 0.30 * haloStrength})`;
|
| 246 |
+
return { fill: toHex(finalRgb), stroke, strokeWidth: (opts.haloWidth == null ? 1 : Number(opts.haloWidth)) };
|
| 247 |
+
} catch {
|
| 248 |
+
return { fill: getCssVar('--text-color') || '#000', stroke: 'var(--transparent-page-contrast)', strokeWidth: 1 };
|
| 249 |
+
}
|
| 250 |
+
};
|
| 251 |
+
window.ColorPalettes = {
|
| 252 |
+
refresh: updatePalettes,
|
| 253 |
+
notify: () => { try { const primaryOKLCH = getPrimaryOKLCH(); const primaryHex = getPrimaryHex(); document.dispatchEvent(new CustomEvent('palettes:updated', { detail: { primary: primaryHex, primaryOKLCH } })); } catch { } },
|
| 254 |
+
getPrimary: () => getPrimaryHex(),
|
| 255 |
+
getPrimaryOKLCH: () => getPrimaryOKLCH(),
|
| 256 |
+
getColors: (key, count = 6) => {
|
| 257 |
+
const primaryOKLCH = getPrimaryOKLCH();
|
| 258 |
+
if (!primaryOKLCH) return [];
|
| 259 |
+
const total = Math.max(1, Math.min(12, Number(count) || 6));
|
| 260 |
+
if (key === 'categorical') return generators.categorical(primaryOKLCH, total);
|
| 261 |
+
if (key === 'sequential') return generators.sequential(primaryOKLCH, total);
|
| 262 |
+
if (key === 'diverging') return generators.diverging(primaryOKLCH, total);
|
| 263 |
+
return [];
|
| 264 |
+
},
|
| 265 |
+
getTextStyleForBackground: (bgCss, opts) => pickTextStyleForBackground(bgCss, opts || {}),
|
| 266 |
+
chooseReadableText: (bgCss, opts) => pickTextStyleForBackground(bgCss, opts || {})
|
| 267 |
+
};
|
| 268 |
+
};
|
| 269 |
+
|
| 270 |
+
if (document.readyState === 'loading') document.addEventListener('DOMContentLoaded', bootstrap, { once: true });
|
| 271 |
+
else bootstrap();
|
| 272 |
+
})();
|
| 273 |
+
|
| 274 |
+
|
app/scripts/export-latex.mjs
ADDED
|
@@ -0,0 +1,318 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env node
|
| 2 |
+
import { spawn } from 'node:child_process';
|
| 3 |
+
import { promises as fs } from 'node:fs';
|
| 4 |
+
import { resolve, dirname, basename, extname } from 'node:path';
|
| 5 |
+
import process from 'node:process';
|
| 6 |
+
|
| 7 |
+
async function run(command, args = [], options = {}) {
|
| 8 |
+
return new Promise((resolvePromise, reject) => {
|
| 9 |
+
const child = spawn(command, args, { stdio: 'inherit', shell: false, ...options });
|
| 10 |
+
child.on('error', reject);
|
| 11 |
+
child.on('exit', (code) => {
|
| 12 |
+
if (code === 0) resolvePromise(undefined);
|
| 13 |
+
else reject(new Error(`${command} ${args.join(' ')} exited with code ${code}`));
|
| 14 |
+
});
|
| 15 |
+
});
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
function parseArgs(argv) {
|
| 19 |
+
const out = {};
|
| 20 |
+
for (const arg of argv.slice(2)) {
|
| 21 |
+
if (!arg.startsWith('--')) continue;
|
| 22 |
+
const [k, v] = arg.replace(/^--/, '').split('=');
|
| 23 |
+
out[k] = v === undefined ? true : v;
|
| 24 |
+
}
|
| 25 |
+
return out;
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
function slugify(text) {
|
| 29 |
+
return String(text || '')
|
| 30 |
+
.normalize('NFKD')
|
| 31 |
+
.replace(/\p{Diacritic}+/gu, '')
|
| 32 |
+
.toLowerCase()
|
| 33 |
+
.replace(/[^a-z0-9]+/g, '-')
|
| 34 |
+
.replace(/^-+|-+$/g, '')
|
| 35 |
+
.slice(0, 120) || 'article';
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
async function checkPandocInstalled() {
|
| 39 |
+
try {
|
| 40 |
+
await run('pandoc', ['--version'], { stdio: 'pipe' });
|
| 41 |
+
return true;
|
| 42 |
+
} catch {
|
| 43 |
+
return false;
|
| 44 |
+
}
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
async function readMdxFile(filePath) {
|
| 48 |
+
try {
|
| 49 |
+
const content = await fs.readFile(filePath, 'utf-8');
|
| 50 |
+
return content;
|
| 51 |
+
} catch (error) {
|
| 52 |
+
console.warn(`Warning: Could not read ${filePath}:`, error.message);
|
| 53 |
+
return '';
|
| 54 |
+
}
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
function extractFrontmatter(content) {
|
| 58 |
+
const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---\n/);
|
| 59 |
+
if (!frontmatterMatch) return { frontmatter: {}, content };
|
| 60 |
+
|
| 61 |
+
const frontmatterText = frontmatterMatch[1];
|
| 62 |
+
const contentWithoutFrontmatter = content.replace(frontmatterMatch[0], '');
|
| 63 |
+
|
| 64 |
+
// Simple YAML parsing for basic fields
|
| 65 |
+
const frontmatter = {};
|
| 66 |
+
const lines = frontmatterText.split('\n');
|
| 67 |
+
let currentKey = null;
|
| 68 |
+
let currentValue = '';
|
| 69 |
+
|
| 70 |
+
for (const line of lines) {
|
| 71 |
+
const trimmed = line.trim();
|
| 72 |
+
if (trimmed.includes(':') && !trimmed.startsWith('-')) {
|
| 73 |
+
if (currentKey) {
|
| 74 |
+
frontmatter[currentKey] = currentValue.trim();
|
| 75 |
+
}
|
| 76 |
+
const [key, ...valueParts] = trimmed.split(':');
|
| 77 |
+
currentKey = key.trim();
|
| 78 |
+
currentValue = valueParts.join(':').trim();
|
| 79 |
+
} else if (currentKey) {
|
| 80 |
+
currentValue += '\n' + trimmed;
|
| 81 |
+
}
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
if (currentKey) {
|
| 85 |
+
frontmatter[currentKey] = currentValue.trim();
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
return { frontmatter, content: contentWithoutFrontmatter };
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
function cleanMdxToMarkdown(content) {
|
| 92 |
+
// Remove import statements
|
| 93 |
+
content = content.replace(/^import .+?;?\s*$/gm, '');
|
| 94 |
+
|
| 95 |
+
// Remove JSX component calls like <ComponentName />
|
| 96 |
+
content = content.replace(/<[A-Z][a-zA-Z0-9]*\s*\/>/g, '');
|
| 97 |
+
|
| 98 |
+
// Convert JSX components to simpler markdown
|
| 99 |
+
// Handle Sidenote components specially
|
| 100 |
+
content = content.replace(/<Sidenote>([\s\S]*?)<\/Sidenote>/g, (match, innerContent) => {
|
| 101 |
+
// Extract main content and aside content
|
| 102 |
+
const asideMatch = innerContent.match(/<Fragment slot="aside">([\s\S]*?)<\/Fragment>/);
|
| 103 |
+
const mainContent = innerContent.replace(/<Fragment slot="aside">[\s\S]*?<\/Fragment>/, '').trim();
|
| 104 |
+
const asideContent = asideMatch ? asideMatch[1].trim() : '';
|
| 105 |
+
|
| 106 |
+
let result = mainContent;
|
| 107 |
+
if (asideContent) {
|
| 108 |
+
result += `\n\n> **Note:** ${asideContent}`;
|
| 109 |
+
}
|
| 110 |
+
return result;
|
| 111 |
+
});
|
| 112 |
+
|
| 113 |
+
// Handle Note components
|
| 114 |
+
content = content.replace(/<Note[^>]*>([\s\S]*?)<\/Note>/g, (match, innerContent) => {
|
| 115 |
+
return `\n> **Note:** ${innerContent.trim()}\n`;
|
| 116 |
+
});
|
| 117 |
+
|
| 118 |
+
// Handle Wide and FullWidth components
|
| 119 |
+
content = content.replace(/<(Wide|FullWidth)>([\s\S]*?)<\/\1>/g, '$2');
|
| 120 |
+
|
| 121 |
+
// Handle HtmlEmbed components (convert to simple text)
|
| 122 |
+
content = content.replace(/<HtmlEmbed[^>]*\/>/g, '*[Interactive content not available in LaTeX]*');
|
| 123 |
+
|
| 124 |
+
// Remove remaining JSX fragments
|
| 125 |
+
content = content.replace(/<Fragment[^>]*>([\s\S]*?)<\/Fragment>/g, '$1');
|
| 126 |
+
content = content.replace(/<[A-Z][a-zA-Z0-9]*[^>]*>([\s\S]*?)<\/[A-Z][a-zA-Z0-9]*>/g, '$1');
|
| 127 |
+
|
| 128 |
+
// Clean up className attributes
|
| 129 |
+
content = content.replace(/className="[^"]*"/g, '');
|
| 130 |
+
|
| 131 |
+
// Clean up extra whitespace
|
| 132 |
+
content = content.replace(/\n{3,}/g, '\n\n');
|
| 133 |
+
|
| 134 |
+
return content.trim();
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
async function processChapterImports(content, contentDir) {
|
| 138 |
+
let processedContent = content;
|
| 139 |
+
|
| 140 |
+
// First, extract all import statements and their corresponding component calls
|
| 141 |
+
const importPattern = /import\s+(\w+)\s+from\s+["']\.\/chapters\/([^"']+)["'];?/g;
|
| 142 |
+
const imports = new Map();
|
| 143 |
+
let match;
|
| 144 |
+
|
| 145 |
+
// Collect all imports
|
| 146 |
+
while ((match = importPattern.exec(content)) !== null) {
|
| 147 |
+
const [fullImport, componentName, chapterPath] = match;
|
| 148 |
+
imports.set(componentName, { path: chapterPath, importStatement: fullImport });
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
// Remove all import statements
|
| 152 |
+
processedContent = processedContent.replace(importPattern, '');
|
| 153 |
+
|
| 154 |
+
// Process each component call
|
| 155 |
+
for (const [componentName, { path: chapterPath }] of imports) {
|
| 156 |
+
const componentCallPattern = new RegExp(`<${componentName}\\s*\\/>`, 'g');
|
| 157 |
+
|
| 158 |
+
try {
|
| 159 |
+
const chapterFile = resolve(contentDir, 'chapters', chapterPath);
|
| 160 |
+
const chapterContent = await readMdxFile(chapterFile);
|
| 161 |
+
const { content: chapterMarkdown } = extractFrontmatter(chapterContent);
|
| 162 |
+
const cleanChapter = cleanMdxToMarkdown(chapterMarkdown);
|
| 163 |
+
|
| 164 |
+
processedContent = processedContent.replace(componentCallPattern, cleanChapter);
|
| 165 |
+
console.log(`✅ Processed chapter: ${chapterPath}`);
|
| 166 |
+
} catch (error) {
|
| 167 |
+
console.warn(`Warning: Could not process chapter ${chapterPath}:`, error.message);
|
| 168 |
+
processedContent = processedContent.replace(componentCallPattern, `\n*[Chapter ${chapterPath} could not be loaded]*\n`);
|
| 169 |
+
}
|
| 170 |
+
}
|
| 171 |
+
|
| 172 |
+
return processedContent;
|
| 173 |
+
}
|
| 174 |
+
|
| 175 |
+
function createLatexPreamble(frontmatter) {
|
| 176 |
+
const title = frontmatter.title ? frontmatter.title.replace(/\n/g, ' ') : 'Untitled Article';
|
| 177 |
+
const subtitle = frontmatter.subtitle || '';
|
| 178 |
+
const authors = frontmatter.authors || '';
|
| 179 |
+
const date = frontmatter.published || '';
|
| 180 |
+
|
| 181 |
+
return `\\documentclass[11pt,a4paper]{article}
|
| 182 |
+
\\usepackage[utf8]{inputenc}
|
| 183 |
+
\\usepackage[T1]{fontenc}
|
| 184 |
+
\\usepackage{amsmath,amsfonts,amssymb}
|
| 185 |
+
\\usepackage{graphicx}
|
| 186 |
+
\\usepackage{hyperref}
|
| 187 |
+
\\usepackage{booktabs}
|
| 188 |
+
\\usepackage{longtable}
|
| 189 |
+
\\usepackage{array}
|
| 190 |
+
\\usepackage{multirow}
|
| 191 |
+
\\usepackage{wrapfig}
|
| 192 |
+
\\usepackage{float}
|
| 193 |
+
\\usepackage{colortbl}
|
| 194 |
+
\\usepackage{pdflscape}
|
| 195 |
+
\\usepackage{tabu}
|
| 196 |
+
\\usepackage{threeparttable}
|
| 197 |
+
\\usepackage{threeparttablex}
|
| 198 |
+
\\usepackage{ulem}
|
| 199 |
+
\\usepackage{makecell}
|
| 200 |
+
\\usepackage{xcolor}
|
| 201 |
+
\\usepackage{listings}
|
| 202 |
+
\\usepackage{fancyvrb}
|
| 203 |
+
\\usepackage{geometry}
|
| 204 |
+
\\geometry{margin=1in}
|
| 205 |
+
|
| 206 |
+
\\title{${title}${subtitle ? `\\\\\\large ${subtitle}` : ''}}
|
| 207 |
+
${authors ? `\\author{${authors}}` : ''}
|
| 208 |
+
${date ? `\\date{${date}}` : ''}
|
| 209 |
+
|
| 210 |
+
\\begin{document}
|
| 211 |
+
\\maketitle
|
| 212 |
+
\\tableofcontents
|
| 213 |
+
\\newpage
|
| 214 |
+
|
| 215 |
+
`;
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
async function main() {
|
| 219 |
+
const cwd = process.cwd();
|
| 220 |
+
const args = parseArgs(process.argv);
|
| 221 |
+
|
| 222 |
+
// Check if pandoc is installed
|
| 223 |
+
const hasPandoc = await checkPandocInstalled();
|
| 224 |
+
if (!hasPandoc) {
|
| 225 |
+
console.error('❌ Pandoc is not installed. Please install it first:');
|
| 226 |
+
console.error(' macOS: brew install pandoc');
|
| 227 |
+
console.error(' Ubuntu: apt-get install pandoc');
|
| 228 |
+
console.error(' Windows: choco install pandoc');
|
| 229 |
+
process.exit(1);
|
| 230 |
+
}
|
| 231 |
+
|
| 232 |
+
const contentDir = resolve(cwd, 'src/content');
|
| 233 |
+
const articleFile = resolve(contentDir, 'article.mdx');
|
| 234 |
+
|
| 235 |
+
// Check if article.mdx exists
|
| 236 |
+
try {
|
| 237 |
+
await fs.access(articleFile);
|
| 238 |
+
} catch {
|
| 239 |
+
console.error(`❌ Could not find article.mdx at ${articleFile}`);
|
| 240 |
+
process.exit(1);
|
| 241 |
+
}
|
| 242 |
+
|
| 243 |
+
console.log('> Reading article content...');
|
| 244 |
+
const articleContent = await readMdxFile(articleFile);
|
| 245 |
+
const { frontmatter, content } = extractFrontmatter(articleContent);
|
| 246 |
+
|
| 247 |
+
console.log('> Processing chapters...');
|
| 248 |
+
const processedContent = await processChapterImports(content, contentDir);
|
| 249 |
+
|
| 250 |
+
console.log('> Converting MDX to Markdown...');
|
| 251 |
+
const markdownContent = cleanMdxToMarkdown(processedContent);
|
| 252 |
+
|
| 253 |
+
// Generate output filename
|
| 254 |
+
const title = frontmatter.title ? frontmatter.title.replace(/\n/g, ' ') : 'article';
|
| 255 |
+
const outFileBase = args.filename ? String(args.filename).replace(/\.(tex|pdf)$/i, '') : slugify(title);
|
| 256 |
+
|
| 257 |
+
// Create temporary markdown file
|
| 258 |
+
const tempMdFile = resolve(cwd, 'temp-article.md');
|
| 259 |
+
await fs.writeFile(tempMdFile, markdownContent);
|
| 260 |
+
|
| 261 |
+
|
| 262 |
+
console.log('> Converting to LaTeX with Pandoc...');
|
| 263 |
+
const outputLatex = resolve(cwd, 'dist', `${outFileBase}.tex`);
|
| 264 |
+
|
| 265 |
+
// Ensure dist directory exists
|
| 266 |
+
await fs.mkdir(resolve(cwd, 'dist'), { recursive: true });
|
| 267 |
+
|
| 268 |
+
// Pandoc conversion arguments
|
| 269 |
+
const pandocArgs = [
|
| 270 |
+
tempMdFile,
|
| 271 |
+
'-o', outputLatex,
|
| 272 |
+
'--from=markdown',
|
| 273 |
+
'--to=latex',
|
| 274 |
+
'--standalone',
|
| 275 |
+
'--toc',
|
| 276 |
+
'--number-sections',
|
| 277 |
+
'--highlight-style=tango',
|
| 278 |
+
'--listings'
|
| 279 |
+
];
|
| 280 |
+
|
| 281 |
+
// Add bibliography if it exists
|
| 282 |
+
const bibFile = resolve(contentDir, 'bibliography.bib');
|
| 283 |
+
try {
|
| 284 |
+
await fs.access(bibFile);
|
| 285 |
+
pandocArgs.push('--bibliography', bibFile);
|
| 286 |
+
pandocArgs.push('--citeproc');
|
| 287 |
+
console.log('✅ Found bibliography file, including citations');
|
| 288 |
+
} catch {
|
| 289 |
+
console.log('ℹ️ No bibliography file found');
|
| 290 |
+
}
|
| 291 |
+
|
| 292 |
+
try {
|
| 293 |
+
await run('pandoc', pandocArgs);
|
| 294 |
+
console.log(`✅ LaTeX generated: ${outputLatex}`);
|
| 295 |
+
|
| 296 |
+
// Optionally compile to PDF if requested
|
| 297 |
+
if (args.pdf) {
|
| 298 |
+
console.log('> Compiling LaTeX to PDF...');
|
| 299 |
+
const outputPdf = resolve(cwd, 'dist', `${outFileBase}.pdf`);
|
| 300 |
+
await run('pdflatex', ['-output-directory', resolve(cwd, 'dist'), outputLatex]);
|
| 301 |
+
console.log(`✅ PDF generated: ${outputPdf}`);
|
| 302 |
+
}
|
| 303 |
+
|
| 304 |
+
} catch (error) {
|
| 305 |
+
console.error('❌ Pandoc conversion failed:', error.message);
|
| 306 |
+
process.exit(1);
|
| 307 |
+
} finally {
|
| 308 |
+
// Clean up temporary file
|
| 309 |
+
try {
|
| 310 |
+
await fs.unlink(tempMdFile);
|
| 311 |
+
} catch { }
|
| 312 |
+
}
|
| 313 |
+
}
|
| 314 |
+
|
| 315 |
+
main().catch((err) => {
|
| 316 |
+
console.error(err);
|
| 317 |
+
process.exit(1);
|
| 318 |
+
});
|
app/scripts/export-pdf.mjs
ADDED
|
@@ -0,0 +1,483 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env node
|
| 2 |
+
import { spawn } from 'node:child_process';
|
| 3 |
+
import { setTimeout as delay } from 'node:timers/promises';
|
| 4 |
+
import { chromium } from 'playwright';
|
| 5 |
+
import { resolve } from 'node:path';
|
| 6 |
+
import { promises as fs } from 'node:fs';
|
| 7 |
+
import process from 'node:process';
|
| 8 |
+
|
| 9 |
+
async function run(command, args = [], options = {}) {
|
| 10 |
+
return new Promise((resolvePromise, reject) => {
|
| 11 |
+
const child = spawn(command, args, { stdio: 'inherit', shell: false, ...options });
|
| 12 |
+
child.on('error', reject);
|
| 13 |
+
child.on('exit', (code) => {
|
| 14 |
+
if (code === 0) resolvePromise(undefined);
|
| 15 |
+
else reject(new Error(`${command} ${args.join(' ')} exited with code ${code}`));
|
| 16 |
+
});
|
| 17 |
+
});
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
async function waitForServer(url, timeoutMs = 60000) {
|
| 21 |
+
const start = Date.now();
|
| 22 |
+
while (Date.now() - start < timeoutMs) {
|
| 23 |
+
try {
|
| 24 |
+
const res = await fetch(url);
|
| 25 |
+
if (res.ok) return;
|
| 26 |
+
} catch {}
|
| 27 |
+
await delay(500);
|
| 28 |
+
}
|
| 29 |
+
throw new Error(`Server did not start in time: ${url}`);
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
function parseArgs(argv) {
|
| 33 |
+
const out = {};
|
| 34 |
+
for (const arg of argv.slice(2)) {
|
| 35 |
+
if (!arg.startsWith('--')) continue;
|
| 36 |
+
const [k, v] = arg.replace(/^--/, '').split('=');
|
| 37 |
+
out[k] = v === undefined ? true : v;
|
| 38 |
+
}
|
| 39 |
+
return out;
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
function slugify(text) {
|
| 43 |
+
return String(text || '')
|
| 44 |
+
.normalize('NFKD')
|
| 45 |
+
.replace(/\p{Diacritic}+/gu, '')
|
| 46 |
+
.toLowerCase()
|
| 47 |
+
.replace(/[^a-z0-9]+/g, '-')
|
| 48 |
+
.replace(/^-+|-+$/g, '')
|
| 49 |
+
.slice(0, 120) || 'article';
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
function parseMargin(margin) {
|
| 53 |
+
if (!margin) return { top: '12mm', right: '12mm', bottom: '16mm', left: '12mm' };
|
| 54 |
+
const parts = String(margin).split(',').map(s => s.trim()).filter(Boolean);
|
| 55 |
+
if (parts.length === 1) {
|
| 56 |
+
return { top: parts[0], right: parts[0], bottom: parts[0], left: parts[0] };
|
| 57 |
+
}
|
| 58 |
+
if (parts.length === 2) {
|
| 59 |
+
return { top: parts[0], right: parts[1], bottom: parts[0], left: parts[1] };
|
| 60 |
+
}
|
| 61 |
+
if (parts.length === 3) {
|
| 62 |
+
return { top: parts[0], right: parts[1], bottom: parts[2], left: parts[1] };
|
| 63 |
+
}
|
| 64 |
+
return { top: parts[0] || '12mm', right: parts[1] || '12mm', bottom: parts[2] || '16mm', left: parts[3] || '12mm' };
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
function cssLengthToMm(val) {
|
| 68 |
+
if (!val) return 0;
|
| 69 |
+
const s = String(val).trim();
|
| 70 |
+
if (/mm$/i.test(s)) return parseFloat(s);
|
| 71 |
+
if (/cm$/i.test(s)) return parseFloat(s) * 10;
|
| 72 |
+
if (/in$/i.test(s)) return parseFloat(s) * 25.4;
|
| 73 |
+
if (/px$/i.test(s)) return (parseFloat(s) / 96) * 25.4; // 96 CSS px per inch
|
| 74 |
+
const num = parseFloat(s);
|
| 75 |
+
return Number.isFinite(num) ? num : 0; // assume mm if unitless
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
function getFormatSizeMm(format) {
|
| 79 |
+
const f = String(format || 'A4').toLowerCase();
|
| 80 |
+
switch (f) {
|
| 81 |
+
case 'letter': return { w: 215.9, h: 279.4 };
|
| 82 |
+
case 'legal': return { w: 215.9, h: 355.6 };
|
| 83 |
+
case 'a3': return { w: 297, h: 420 };
|
| 84 |
+
case 'tabloid': return { w: 279.4, h: 431.8 };
|
| 85 |
+
case 'a4':
|
| 86 |
+
default: return { w: 210, h: 297 };
|
| 87 |
+
}
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
async function waitForImages(page, timeoutMs = 15000) {
|
| 91 |
+
await page.evaluate(async (timeout) => {
|
| 92 |
+
const deadline = Date.now() + timeout;
|
| 93 |
+
const imgs = Array.from(document.images || []);
|
| 94 |
+
const unloaded = imgs.filter(img => !img.complete || (img.naturalWidth === 0));
|
| 95 |
+
await Promise.race([
|
| 96 |
+
Promise.all(unloaded.map(img => new Promise(res => {
|
| 97 |
+
if (img.complete && img.naturalWidth !== 0) return res(undefined);
|
| 98 |
+
img.addEventListener('load', () => res(undefined), { once: true });
|
| 99 |
+
img.addEventListener('error', () => res(undefined), { once: true });
|
| 100 |
+
}))),
|
| 101 |
+
new Promise(res => setTimeout(res, Math.max(0, deadline - Date.now())))
|
| 102 |
+
]);
|
| 103 |
+
}, timeoutMs);
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
async function waitForPlotly(page, timeoutMs = 20000) {
|
| 107 |
+
await page.evaluate(async (timeout) => {
|
| 108 |
+
const start = Date.now();
|
| 109 |
+
const hasPlots = () => Array.from(document.querySelectorAll('.js-plotly-plot')).length > 0;
|
| 110 |
+
// Wait until plots exist or timeout
|
| 111 |
+
while (!hasPlots() && (Date.now() - start) < timeout) {
|
| 112 |
+
await new Promise(r => setTimeout(r, 200));
|
| 113 |
+
}
|
| 114 |
+
const deadline = start + timeout;
|
| 115 |
+
// Then wait until each plot contains the main svg
|
| 116 |
+
const allReady = () => Array.from(document.querySelectorAll('.js-plotly-plot')).every(el => el.querySelector('svg.main-svg'));
|
| 117 |
+
while (!allReady() && Date.now() < deadline) {
|
| 118 |
+
await new Promise(r => setTimeout(r, 200));
|
| 119 |
+
}
|
| 120 |
+
}, timeoutMs);
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
async function waitForD3(page, timeoutMs = 20000) {
|
| 124 |
+
await page.evaluate(async (timeout) => {
|
| 125 |
+
const start = Date.now();
|
| 126 |
+
const isReady = () => {
|
| 127 |
+
// Prioritize hero banner if present (generic container)
|
| 128 |
+
const hero = document.querySelector('.hero-banner');
|
| 129 |
+
if (hero) {
|
| 130 |
+
return !!hero.querySelector('svg circle, svg path, svg rect, svg g');
|
| 131 |
+
}
|
| 132 |
+
// Else require all D3 containers on page to have shapes
|
| 133 |
+
const containers = [
|
| 134 |
+
...Array.from(document.querySelectorAll('.d3-line')),
|
| 135 |
+
...Array.from(document.querySelectorAll('.d3-bar'))
|
| 136 |
+
];
|
| 137 |
+
if (!containers.length) return true;
|
| 138 |
+
return containers.every(c => c.querySelector('svg circle, svg path, svg rect, svg g'));
|
| 139 |
+
};
|
| 140 |
+
while (!isReady() && (Date.now() - start) < timeout) {
|
| 141 |
+
await new Promise(r => setTimeout(r, 200));
|
| 142 |
+
}
|
| 143 |
+
}, timeoutMs);
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
async function waitForStableLayout(page, timeoutMs = 5000) {
|
| 147 |
+
const start = Date.now();
|
| 148 |
+
let last = await page.evaluate(() => document.scrollingElement ? document.scrollingElement.scrollHeight : document.body.scrollHeight);
|
| 149 |
+
let stableCount = 0;
|
| 150 |
+
while ((Date.now() - start) < timeoutMs && stableCount < 3) {
|
| 151 |
+
await page.waitForTimeout(250);
|
| 152 |
+
const now = await page.evaluate(() => document.scrollingElement ? document.scrollingElement.scrollHeight : document.body.scrollHeight);
|
| 153 |
+
if (now === last) stableCount += 1; else { stableCount = 0; last = now; }
|
| 154 |
+
}
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
async function main() {
|
| 158 |
+
const cwd = process.cwd();
|
| 159 |
+
const port = Number(process.env.PREVIEW_PORT || 8080);
|
| 160 |
+
const baseUrl = `http://127.0.0.1:${port}/`;
|
| 161 |
+
const args = parseArgs(process.argv);
|
| 162 |
+
// Default: light (do not rely on env vars implicitly)
|
| 163 |
+
const theme = (args.theme === 'dark' || args.theme === 'light') ? args.theme : 'light';
|
| 164 |
+
const format = args.format || 'A4';
|
| 165 |
+
const margin = parseMargin(args.margin);
|
| 166 |
+
const wait = (args.wait || 'full'); // 'networkidle' | 'images' | 'plotly' | 'full'
|
| 167 |
+
|
| 168 |
+
// filename can be provided, else computed from DOM (button) or page title later
|
| 169 |
+
let outFileBase = (args.filename && String(args.filename).replace(/\.pdf$/i, '')) || 'article';
|
| 170 |
+
|
| 171 |
+
// Build only if dist/ does not exist
|
| 172 |
+
const distDir = resolve(cwd, 'dist');
|
| 173 |
+
let hasDist = false;
|
| 174 |
+
try {
|
| 175 |
+
const st = await fs.stat(distDir);
|
| 176 |
+
hasDist = st && st.isDirectory();
|
| 177 |
+
} catch {}
|
| 178 |
+
if (!hasDist) {
|
| 179 |
+
console.log('> Building Astro site…');
|
| 180 |
+
await run('npm', ['run', 'build']);
|
| 181 |
+
} else {
|
| 182 |
+
console.log('> Skipping build (dist/ exists)…');
|
| 183 |
+
}
|
| 184 |
+
|
| 185 |
+
console.log('> Starting Astro preview…');
|
| 186 |
+
// Start preview in its own process group so we can terminate all children reliably
|
| 187 |
+
const preview = spawn('npm', ['run', 'preview'], { cwd, stdio: 'inherit', detached: true });
|
| 188 |
+
const previewExit = new Promise((resolvePreview) => {
|
| 189 |
+
preview.on('close', (code, signal) => resolvePreview({ code, signal }));
|
| 190 |
+
});
|
| 191 |
+
|
| 192 |
+
try {
|
| 193 |
+
await waitForServer(baseUrl, 60000);
|
| 194 |
+
console.log('> Server ready, generating PDF…');
|
| 195 |
+
|
| 196 |
+
const browser = await chromium.launch({ headless: true });
|
| 197 |
+
try {
|
| 198 |
+
const context = await browser.newContext();
|
| 199 |
+
await context.addInitScript((desired) => {
|
| 200 |
+
try {
|
| 201 |
+
localStorage.setItem('theme', desired);
|
| 202 |
+
// Apply theme immediately to avoid flashes
|
| 203 |
+
if (document && document.documentElement) {
|
| 204 |
+
document.documentElement.dataset.theme = desired;
|
| 205 |
+
}
|
| 206 |
+
} catch {}
|
| 207 |
+
}, theme);
|
| 208 |
+
const page = await context.newPage();
|
| 209 |
+
// Pre-fit viewport width to printable width so charts size correctly
|
| 210 |
+
const fmt = getFormatSizeMm(format);
|
| 211 |
+
const mw = fmt.w - cssLengthToMm(margin.left) - cssLengthToMm(margin.right);
|
| 212 |
+
const printableWidthPx = Math.max(320, Math.round((mw / 25.4) * 96));
|
| 213 |
+
await page.setViewportSize({ width: printableWidthPx, height: 1200 });
|
| 214 |
+
await page.goto(baseUrl, { waitUntil: 'load', timeout: 60000 });
|
| 215 |
+
// Give time for CDN scripts (Plotly/D3) to attach and for our fragment hooks to run
|
| 216 |
+
try { await page.waitForFunction(() => !!window.Plotly, { timeout: 8000 }); } catch {}
|
| 217 |
+
try { await page.waitForFunction(() => !!window.d3, { timeout: 8000 }); } catch {}
|
| 218 |
+
// Prefer explicit filename from the download button if present
|
| 219 |
+
if (!args.filename) {
|
| 220 |
+
const fromBtn = await page.evaluate(() => {
|
| 221 |
+
const btn = document.getElementById('download-pdf-btn');
|
| 222 |
+
const f = btn ? btn.getAttribute('data-pdf-filename') : null;
|
| 223 |
+
return f || '';
|
| 224 |
+
});
|
| 225 |
+
if (fromBtn) {
|
| 226 |
+
outFileBase = String(fromBtn).replace(/\.pdf$/i, '');
|
| 227 |
+
} else {
|
| 228 |
+
// Fallback: compute slug from hero title or document.title
|
| 229 |
+
const title = await page.evaluate(() => {
|
| 230 |
+
const h1 = document.querySelector('h1.hero-title');
|
| 231 |
+
const t = h1 ? h1.textContent : document.title;
|
| 232 |
+
return (t || '').replace(/\s+/g, ' ').trim();
|
| 233 |
+
});
|
| 234 |
+
outFileBase = slugify(title);
|
| 235 |
+
}
|
| 236 |
+
}
|
| 237 |
+
|
| 238 |
+
// Wait for render readiness
|
| 239 |
+
if (wait === 'images' || wait === 'full') {
|
| 240 |
+
await waitForImages(page);
|
| 241 |
+
}
|
| 242 |
+
if (wait === 'd3' || wait === 'full') {
|
| 243 |
+
await waitForD3(page);
|
| 244 |
+
}
|
| 245 |
+
if (wait === 'plotly' || wait === 'full') {
|
| 246 |
+
await waitForPlotly(page);
|
| 247 |
+
}
|
| 248 |
+
if (wait === 'full') {
|
| 249 |
+
await waitForStableLayout(page);
|
| 250 |
+
}
|
| 251 |
+
await page.emulateMedia({ media: 'print' });
|
| 252 |
+
|
| 253 |
+
// Enforce responsive sizing for SVG/iframes by removing hard attrs and injecting CSS (top-level and inside same-origin iframes)
|
| 254 |
+
try {
|
| 255 |
+
await page.evaluate(() => {
|
| 256 |
+
function isSmallSvg(svg){
|
| 257 |
+
try {
|
| 258 |
+
const vb = svg && svg.viewBox && svg.viewBox.baseVal ? svg.viewBox.baseVal : null;
|
| 259 |
+
if (vb && vb.width && vb.height && vb.width <= 50 && vb.height <= 50) return true;
|
| 260 |
+
const r = svg.getBoundingClientRect && svg.getBoundingClientRect();
|
| 261 |
+
if (r && r.width && r.height && r.width <= 50 && r.height <= 50) return true;
|
| 262 |
+
} catch {}
|
| 263 |
+
return false;
|
| 264 |
+
}
|
| 265 |
+
function lockSmallSvgSize(svg){
|
| 266 |
+
try {
|
| 267 |
+
const r = svg.getBoundingClientRect ? svg.getBoundingClientRect() : null;
|
| 268 |
+
const w = (r && r.width) ? Math.round(r.width) : null;
|
| 269 |
+
const h = (r && r.height) ? Math.round(r.height) : null;
|
| 270 |
+
if (w) svg.style.setProperty('width', w + 'px', 'important');
|
| 271 |
+
if (h) svg.style.setProperty('height', h + 'px', 'important');
|
| 272 |
+
svg.style.setProperty('max-width', 'none', 'important');
|
| 273 |
+
} catch {}
|
| 274 |
+
}
|
| 275 |
+
function fixSvg(svg){
|
| 276 |
+
if (!svg) return;
|
| 277 |
+
// Do not alter hero banner SVG sizing; it may rely on explicit width/height
|
| 278 |
+
try { if (svg.closest && svg.closest('.hero-banner')) return; } catch {}
|
| 279 |
+
if (isSmallSvg(svg)) { lockSmallSvgSize(svg); return; }
|
| 280 |
+
try { svg.removeAttribute('width'); } catch {}
|
| 281 |
+
try { svg.removeAttribute('height'); } catch {}
|
| 282 |
+
svg.style.maxWidth = '100%';
|
| 283 |
+
svg.style.width = '100%';
|
| 284 |
+
svg.style.height = 'auto';
|
| 285 |
+
if (!svg.getAttribute('preserveAspectRatio')) svg.setAttribute('preserveAspectRatio','xMidYMid meet');
|
| 286 |
+
}
|
| 287 |
+
document.querySelectorAll('svg').forEach(fixSvg);
|
| 288 |
+
document.querySelectorAll('.mermaid, .mermaid svg').forEach((el)=>{
|
| 289 |
+
if (el.tagName && el.tagName.toLowerCase() === 'svg') fixSvg(el);
|
| 290 |
+
else { el.style.display='block'; el.style.width='100%'; el.style.maxWidth='100%'; }
|
| 291 |
+
});
|
| 292 |
+
document.querySelectorAll('iframe, embed, object').forEach((el) => {
|
| 293 |
+
el.style.width = '100%';
|
| 294 |
+
el.style.maxWidth = '100%';
|
| 295 |
+
try { el.removeAttribute('width'); } catch {}
|
| 296 |
+
// Best-effort inject into same-origin frames
|
| 297 |
+
try {
|
| 298 |
+
const doc = (el.tagName.toLowerCase()==='object' ? el.contentDocument : el.contentDocument);
|
| 299 |
+
if (doc && doc.head) {
|
| 300 |
+
const s = doc.createElement('style');
|
| 301 |
+
s.textContent = 'html,body{overflow-x:hidden;} svg,canvas,img,video{max-width:100%!important;height:auto!important;} svg[width]{width:100%!important}';
|
| 302 |
+
doc.head.appendChild(s);
|
| 303 |
+
doc.querySelectorAll('svg').forEach((svg)=>{ if (isSmallSvg(svg)) lockSmallSvgSize(svg); else fixSvg(svg); });
|
| 304 |
+
}
|
| 305 |
+
} catch (_) { /* cross-origin; ignore */ }
|
| 306 |
+
});
|
| 307 |
+
});
|
| 308 |
+
} catch {}
|
| 309 |
+
|
| 310 |
+
// Generate OG thumbnail (1200x630)
|
| 311 |
+
try {
|
| 312 |
+
const ogW = 1200, ogH = 630;
|
| 313 |
+
await page.setViewportSize({ width: ogW, height: ogH });
|
| 314 |
+
// Give layout a tick to adjust
|
| 315 |
+
await page.waitForTimeout(200);
|
| 316 |
+
// Ensure layout & D3 re-rendered after viewport change
|
| 317 |
+
await page.evaluate(() => { window.scrollTo(0, 0); window.dispatchEvent(new Event('resize')); });
|
| 318 |
+
try { await waitForD3(page, 8000); } catch {}
|
| 319 |
+
|
| 320 |
+
// Temporarily improve visibility for light theme thumbnails
|
| 321 |
+
// - Force normal blend for points
|
| 322 |
+
// - Ensure an SVG background (CSS background on svg element)
|
| 323 |
+
const cssHandle = await page.addStyleTag({ content: `
|
| 324 |
+
.hero .points { mix-blend-mode: normal !important; }
|
| 325 |
+
` });
|
| 326 |
+
const thumbPath = resolve(cwd, 'dist', 'thumb.auto.jpg');
|
| 327 |
+
await page.screenshot({ path: thumbPath, type: 'jpeg', quality: 85, fullPage: false });
|
| 328 |
+
// Also emit PNG for compatibility if needed
|
| 329 |
+
const thumbPngPath = resolve(cwd, 'dist', 'thumb.auto.png');
|
| 330 |
+
await page.screenshot({ path: thumbPngPath, type: 'png', fullPage: false });
|
| 331 |
+
const publicThumb = resolve(cwd, 'public', 'thumb.auto.jpg');
|
| 332 |
+
const publicThumbPng = resolve(cwd, 'public', 'thumb.auto.png');
|
| 333 |
+
try { await fs.copyFile(thumbPath, publicThumb); } catch {}
|
| 334 |
+
try { await fs.copyFile(thumbPngPath, publicThumbPng); } catch {}
|
| 335 |
+
// Remove temporary style so PDF is unaffected
|
| 336 |
+
try { await cssHandle.evaluate((el) => el.remove()); } catch {}
|
| 337 |
+
console.log(`✅ OG thumbnail generated: ${thumbPath}`);
|
| 338 |
+
} catch (e) {
|
| 339 |
+
console.warn('Unable to generate OG thumbnail:', e?.message || e);
|
| 340 |
+
}
|
| 341 |
+
const outPath = resolve(cwd, 'dist', `${outFileBase}.pdf`);
|
| 342 |
+
// Restore viewport to printable width before PDF (thumbnail changed it)
|
| 343 |
+
try {
|
| 344 |
+
const fmt2 = getFormatSizeMm(format);
|
| 345 |
+
const mw2 = fmt2.w - cssLengthToMm(margin.left) - cssLengthToMm(margin.right);
|
| 346 |
+
const printableWidthPx2 = Math.max(320, Math.round((mw2 / 25.4) * 96));
|
| 347 |
+
await page.setViewportSize({ width: printableWidthPx2, height: 1400 });
|
| 348 |
+
await page.evaluate(() => { window.scrollTo(0, 0); window.dispatchEvent(new Event('resize')); });
|
| 349 |
+
try { await waitForD3(page, 8000); } catch {}
|
| 350 |
+
await waitForStableLayout(page);
|
| 351 |
+
// Re-apply responsive fixes after viewport change
|
| 352 |
+
try {
|
| 353 |
+
await page.evaluate(() => {
|
| 354 |
+
function isSmallSvg(svg){
|
| 355 |
+
try {
|
| 356 |
+
const vb = svg && svg.viewBox && svg.viewBox.baseVal ? svg.viewBox.baseVal : null;
|
| 357 |
+
if (vb && vb.width && vb.height && vb.width <= 50 && vb.height <= 50) return true;
|
| 358 |
+
const r = svg.getBoundingClientRect && svg.getBoundingClientRect();
|
| 359 |
+
if (r && r.width && r.height && r.width <= 50 && r.height <= 50) return true;
|
| 360 |
+
} catch {}
|
| 361 |
+
return false;
|
| 362 |
+
}
|
| 363 |
+
function lockSmallSvgSize(svg){
|
| 364 |
+
try {
|
| 365 |
+
const r = svg.getBoundingClientRect ? svg.getBoundingClientRect() : null;
|
| 366 |
+
const w = (r && r.width) ? Math.round(r.width) : null;
|
| 367 |
+
const h = (r && r.height) ? Math.round(r.height) : null;
|
| 368 |
+
if (w) svg.style.setProperty('width', w + 'px', 'important');
|
| 369 |
+
if (h) svg.style.setProperty('height', h + 'px', 'important');
|
| 370 |
+
svg.style.setProperty('max-width', 'none', 'important');
|
| 371 |
+
} catch {}
|
| 372 |
+
}
|
| 373 |
+
function fixSvg(svg){
|
| 374 |
+
if (!svg) return;
|
| 375 |
+
// Do not alter hero banner SVG sizing; it may rely on explicit width/height
|
| 376 |
+
try { if (svg.closest && svg.closest('.hero-banner')) return; } catch {}
|
| 377 |
+
if (isSmallSvg(svg)) { lockSmallSvgSize(svg); return; }
|
| 378 |
+
try { svg.removeAttribute('width'); } catch {}
|
| 379 |
+
try { svg.removeAttribute('height'); } catch {}
|
| 380 |
+
svg.style.maxWidth = '100%';
|
| 381 |
+
svg.style.width = '100%';
|
| 382 |
+
svg.style.height = 'auto';
|
| 383 |
+
if (!svg.getAttribute('preserveAspectRatio')) svg.setAttribute('preserveAspectRatio','xMidYMid meet');
|
| 384 |
+
}
|
| 385 |
+
document.querySelectorAll('svg').forEach((svg)=>{ if (isSmallSvg(svg)) lockSmallSvgSize(svg); else fixSvg(svg); });
|
| 386 |
+
document.querySelectorAll('.mermaid, .mermaid svg').forEach((el)=>{
|
| 387 |
+
if (el.tagName && el.tagName.toLowerCase() === 'svg') fixSvg(el);
|
| 388 |
+
else { el.style.display='block'; el.style.width='100%'; el.style.maxWidth='100%'; }
|
| 389 |
+
});
|
| 390 |
+
document.querySelectorAll('iframe, embed, object').forEach((el) => {
|
| 391 |
+
el.style.width = '100%';
|
| 392 |
+
el.style.maxWidth = '100%';
|
| 393 |
+
try { el.removeAttribute('width'); } catch {}
|
| 394 |
+
try {
|
| 395 |
+
const doc = (el.tagName.toLowerCase()==='object' ? el.contentDocument : el.contentDocument);
|
| 396 |
+
if (doc && doc.head) {
|
| 397 |
+
const s = doc.createElement('style');
|
| 398 |
+
s.textContent = 'html,body{overflow-x:hidden;} svg,canvas,img,video{max-width:100%!important;height:auto!important;} svg[width]{width:100%!important}';
|
| 399 |
+
doc.head.appendChild(s);
|
| 400 |
+
doc.querySelectorAll('svg').forEach((svg)=>{ if (isSmallSvg(svg)) lockSmallSvgSize(svg); else fixSvg(svg); });
|
| 401 |
+
}
|
| 402 |
+
} catch (_) {}
|
| 403 |
+
});
|
| 404 |
+
});
|
| 405 |
+
} catch {}
|
| 406 |
+
} catch {}
|
| 407 |
+
// Temporarily enforce print-safe responsive sizing (SVG/iframes) and improve banner visibility
|
| 408 |
+
let pdfCssHandle = null;
|
| 409 |
+
try {
|
| 410 |
+
pdfCssHandle = await page.addStyleTag({ content: `
|
| 411 |
+
/* General container safety */
|
| 412 |
+
html, body { overflow-x: hidden !important; }
|
| 413 |
+
|
| 414 |
+
/* Make all vector/bitmap media responsive for print */
|
| 415 |
+
svg, canvas, img, video { max-width: 100% !important; height: auto !important; }
|
| 416 |
+
/* Mermaid diagrams */
|
| 417 |
+
.mermaid, .mermaid svg { display: block; width: 100% !important; max-width: 100% !important; height: auto !important; }
|
| 418 |
+
/* Any explicit width attributes */
|
| 419 |
+
svg[width] { width: 100% !important; }
|
| 420 |
+
/* Iframes and similar embeds */
|
| 421 |
+
iframe, embed, object { width: 100% !important; max-width: 100% !important; height: auto; }
|
| 422 |
+
|
| 423 |
+
/* HtmlEmbed wrappers (defensive) */
|
| 424 |
+
.html-embed, .html-embed__card { max-width: 100% !important; width: 100% !important; }
|
| 425 |
+
.html-embed__card > div[id^="frag-"] { width: 100% !important; max-width: 100% !important; }
|
| 426 |
+
|
| 427 |
+
/* Banner centering & visibility */
|
| 428 |
+
.hero .points { mix-blend-mode: normal !important; }
|
| 429 |
+
/* Do NOT force a fixed height to avoid clipping in PDF */
|
| 430 |
+
.hero-banner { width: 100% !important; max-width: 980px !important; margin-left: auto !important; margin-right: auto !important; }
|
| 431 |
+
.hero-banner svg { width: 100% !important; height: auto !important; }
|
| 432 |
+
` });
|
| 433 |
+
} catch {}
|
| 434 |
+
await page.pdf({
|
| 435 |
+
path: outPath,
|
| 436 |
+
format,
|
| 437 |
+
printBackground: true,
|
| 438 |
+
margin
|
| 439 |
+
});
|
| 440 |
+
try { if (pdfCssHandle) await pdfCssHandle.evaluate((el) => el.remove()); } catch {}
|
| 441 |
+
console.log(`✅ PDF generated: ${outPath}`);
|
| 442 |
+
|
| 443 |
+
// Copy into public only under the slugified name
|
| 444 |
+
const publicSlugPath = resolve(cwd, 'public', `${outFileBase}.pdf`);
|
| 445 |
+
try {
|
| 446 |
+
await fs.mkdir(resolve(cwd, 'public'), { recursive: true });
|
| 447 |
+
await fs.copyFile(outPath, publicSlugPath);
|
| 448 |
+
console.log(`✅ PDF copied to: ${publicSlugPath}`);
|
| 449 |
+
} catch (e) {
|
| 450 |
+
console.warn('Unable to copy PDF to public/:', e?.message || e);
|
| 451 |
+
}
|
| 452 |
+
} finally {
|
| 453 |
+
await browser.close();
|
| 454 |
+
}
|
| 455 |
+
} finally {
|
| 456 |
+
// Try a clean shutdown of preview (entire process group first)
|
| 457 |
+
try {
|
| 458 |
+
if (process.platform !== 'win32') {
|
| 459 |
+
try { process.kill(-preview.pid, 'SIGINT'); } catch {}
|
| 460 |
+
}
|
| 461 |
+
try { preview.kill('SIGINT'); } catch {}
|
| 462 |
+
await Promise.race([previewExit, delay(3000)]);
|
| 463 |
+
// Force kill if still alive
|
| 464 |
+
// eslint-disable-next-line no-unsafe-optional-chaining
|
| 465 |
+
if (!preview.killed) {
|
| 466 |
+
try {
|
| 467 |
+
if (process.platform !== 'win32') {
|
| 468 |
+
try { process.kill(-preview.pid, 'SIGKILL'); } catch {}
|
| 469 |
+
}
|
| 470 |
+
try { preview.kill('SIGKILL'); } catch {}
|
| 471 |
+
} catch {}
|
| 472 |
+
await Promise.race([previewExit, delay(1000)]);
|
| 473 |
+
}
|
| 474 |
+
} catch {}
|
| 475 |
+
}
|
| 476 |
+
}
|
| 477 |
+
|
| 478 |
+
main().catch((err) => {
|
| 479 |
+
console.error(err);
|
| 480 |
+
process.exit(1);
|
| 481 |
+
});
|
| 482 |
+
|
| 483 |
+
|
app/scripts/generate-trackio-data.mjs
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env node
|
| 2 |
+
|
| 3 |
+
// Generate synthetic Trackio-like CSV data with realistic ML curves.
|
| 4 |
+
// - Steps are simple integers (e.g., 1..N)
|
| 5 |
+
// - Metrics: epoch, train_accuracy, val_accuracy, train_loss, val_loss
|
| 6 |
+
// - W&B-like run names (e.g., pleasant-flower-1)
|
| 7 |
+
// - Deterministic with --seed
|
| 8 |
+
//
|
| 9 |
+
// Usage:
|
| 10 |
+
// node app/scripts/generate-trackio-data.mjs \
|
| 11 |
+
// --runs 3 \
|
| 12 |
+
// --steps 10 \
|
| 13 |
+
// --out app/src/content/assets/data/trackio_wandb_synth.csv \
|
| 14 |
+
// [--seed 42] [--epoch-max 3.0] [--amount 1.0] [--start 1]
|
| 15 |
+
//
|
| 16 |
+
// To overwrite the demo file used by the embed:
|
| 17 |
+
// node app/scripts/generate-trackio-data.mjs --runs 3 --steps 10 --out app/src/content/assets/data/trackio_wandb_demo.csv --seed 1337
|
| 18 |
+
|
| 19 |
+
import fs from 'node:fs/promises';
|
| 20 |
+
import path from 'node:path';
|
| 21 |
+
|
| 22 |
+
function parseArgs(argv){
|
| 23 |
+
const args = { runs: 3, steps: 10, out: '', seed: undefined, epochMax: 3.0, amount: 1, start: 1 };
|
| 24 |
+
for (let i = 2; i < argv.length; i++){
|
| 25 |
+
const a = argv[i];
|
| 26 |
+
if (a === '--runs' && argv[i+1]) { args.runs = Math.max(1, parseInt(argv[++i], 10) || 3); continue; }
|
| 27 |
+
if (a === '--steps' && argv[i+1]) { args.steps = Math.max(2, parseInt(argv[++i], 10) || 10); continue; }
|
| 28 |
+
if (a === '--out' && argv[i+1]) { args.out = argv[++i]; continue; }
|
| 29 |
+
if (a === '--seed' && argv[i+1]) { args.seed = Number(argv[++i]); continue; }
|
| 30 |
+
if (a === '--epoch-max' && argv[i+1]) { args.epochMax = Number(argv[++i]) || 3.0; continue; }
|
| 31 |
+
if (a === '--amount' && argv[i+1]) { args.amount = Number(argv[++i]) || 1.0; continue; }
|
| 32 |
+
if (a === '--start' && argv[i+1]) { args.start = parseInt(argv[++i], 10) || 1; continue; }
|
| 33 |
+
}
|
| 34 |
+
if (!args.out) {
|
| 35 |
+
args.out = path.join('app', 'src', 'content', 'assets', 'data', 'trackio_wandb_synth.csv');
|
| 36 |
+
}
|
| 37 |
+
return args;
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
function mulberry32(seed){
|
| 41 |
+
let t = seed >>> 0;
|
| 42 |
+
return function(){
|
| 43 |
+
t += 0x6D2B79F5;
|
| 44 |
+
let r = Math.imul(t ^ (t >>> 15), 1 | t);
|
| 45 |
+
r ^= r + Math.imul(r ^ (r >>> 7), 61 | r);
|
| 46 |
+
return ((r ^ (r >>> 14)) >>> 0) / 4294967296;
|
| 47 |
+
};
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
function makeRng(seed){
|
| 51 |
+
if (Number.isFinite(seed)) return mulberry32(seed);
|
| 52 |
+
return Math.random;
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
function randn(rng){
|
| 56 |
+
// Box-Muller transform
|
| 57 |
+
let u = 0, v = 0;
|
| 58 |
+
while (u === 0) u = rng();
|
| 59 |
+
while (v === 0) v = rng();
|
| 60 |
+
return Math.sqrt(-2.0 * Math.log(u)) * Math.cos(2.0 * Math.PI * v);
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
function clamp(x, lo, hi){
|
| 64 |
+
return Math.max(lo, Math.min(hi, x));
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
function logistic(t, k=6, x0=0.5){
|
| 68 |
+
// 1 / (1 + e^{-k (t - x0)}) in [0,1]
|
| 69 |
+
return 1 / (1 + Math.exp(-k * (t - x0)));
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
function expDecay(t, k=3){
|
| 73 |
+
// (1 - e^{-k t}) in [0,1]
|
| 74 |
+
return 1 - Math.exp(-k * t);
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
function pick(array, rng){
|
| 78 |
+
return array[Math.floor(rng() * array.length) % array.length];
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
function buildRunNames(count, rng){
|
| 82 |
+
const adjectives = [
|
| 83 |
+
'pleasant','brisk','silent','ancient','bold','gentle','rapid','shy','curious','lively',
|
| 84 |
+
'fearless','soothing','glossy','hidden','misty','bright','calm','keen','noble','swift'
|
| 85 |
+
];
|
| 86 |
+
const nouns = [
|
| 87 |
+
'flower','glade','sky','river','forest','ember','comet','meadow','harbor','dawn',
|
| 88 |
+
'mountain','prairie','breeze','valley','lagoon','desert','monsoon','reef','thunder','willow'
|
| 89 |
+
];
|
| 90 |
+
const names = new Set();
|
| 91 |
+
let attempts = 0;
|
| 92 |
+
while (names.size < count && attempts < count * 20){
|
| 93 |
+
attempts++;
|
| 94 |
+
const left = pick(adjectives, rng);
|
| 95 |
+
const right = pick(nouns, rng);
|
| 96 |
+
const idx = 1 + Math.floor(rng() * 9);
|
| 97 |
+
names.add(`${left}-${right}-${idx}`);
|
| 98 |
+
}
|
| 99 |
+
return Array.from(names);
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
function formatLike(value, decimals){
|
| 103 |
+
return Number.isFinite(decimals) && decimals >= 0 ? value.toFixed(decimals) : String(value);
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
async function main(){
|
| 107 |
+
const args = parseArgs(process.argv);
|
| 108 |
+
const rng = makeRng(args.seed);
|
| 109 |
+
|
| 110 |
+
// Steps: integers from start .. start+steps-1
|
| 111 |
+
const steps = Array.from({ length: args.steps }, (_, i) => args.start + i);
|
| 112 |
+
const stepNorm = (i) => (i - steps[0]) / (steps[steps.length-1] - steps[0]);
|
| 113 |
+
|
| 114 |
+
const runs = buildRunNames(args.runs, rng);
|
| 115 |
+
|
| 116 |
+
// Per-run slight variations
|
| 117 |
+
const runParams = runs.map((_r, idx) => {
|
| 118 |
+
const r = rng();
|
| 119 |
+
// Final accuracies
|
| 120 |
+
const trainAccFinal = clamp(0.86 + (r - 0.5) * 0.12 * args.amount, 0.78, 0.97);
|
| 121 |
+
const valAccFinal = clamp(trainAccFinal - (0.02 + rng() * 0.05), 0.70, 0.95);
|
| 122 |
+
// Loss plateau
|
| 123 |
+
const lossStart = 7.0 + (rng() - 0.5) * 0.10 * args.amount; // ~7.0 ±0.05
|
| 124 |
+
const lossPlateau = 6.78 + (rng() - 0.5) * 0.04 * args.amount; // ~6.78 ±0.02
|
| 125 |
+
const lossK = 2.0 + rng() * 1.5; // decay speed
|
| 126 |
+
// Acc growth steepness and midpoint
|
| 127 |
+
const kAcc = 4.5 + rng() * 3.0;
|
| 128 |
+
const x0Acc = 0.35 + rng() * 0.25;
|
| 129 |
+
return { trainAccFinal, valAccFinal, lossStart, lossPlateau, lossK, kAcc, x0Acc };
|
| 130 |
+
});
|
| 131 |
+
|
| 132 |
+
const lines = [];
|
| 133 |
+
lines.push('run,step,metric,value,stderr');
|
| 134 |
+
|
| 135 |
+
// EPOCH: linear 0..epochMax across steps
|
| 136 |
+
for (let r = 0; r < runs.length; r++){
|
| 137 |
+
const run = runs[r];
|
| 138 |
+
for (let i = 0; i < steps.length; i++){
|
| 139 |
+
const t = stepNorm(steps[i]);
|
| 140 |
+
const epoch = args.epochMax * t;
|
| 141 |
+
lines.push(`${run},${steps[i]},epoch,${formatLike(epoch, 2)},`);
|
| 142 |
+
}
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
// TRAIN LOSS & VAL LOSS
|
| 146 |
+
for (let r = 0; r < runs.length; r++){
|
| 147 |
+
const run = runs[r];
|
| 148 |
+
const p = runParams[r];
|
| 149 |
+
let prevTrain = null;
|
| 150 |
+
let prevVal = null;
|
| 151 |
+
for (let i = 0; i < steps.length; i++){
|
| 152 |
+
const t = stepNorm(steps[i]);
|
| 153 |
+
const d = expDecay(t, p.lossK); // 0..1
|
| 154 |
+
let trainLoss = p.lossStart - (p.lossStart - p.lossPlateau) * d;
|
| 155 |
+
let valLoss = trainLoss + 0.02 + (rng() * 0.03);
|
| 156 |
+
// Add mild noise
|
| 157 |
+
trainLoss += randn(rng) * 0.01 * args.amount;
|
| 158 |
+
valLoss += randn(rng) * 0.012 * args.amount;
|
| 159 |
+
// Keep reasonable and mostly monotonic (small upward blips allowed)
|
| 160 |
+
if (prevTrain != null) trainLoss = Math.min(prevTrain + 0.01, trainLoss);
|
| 161 |
+
if (prevVal != null) valLoss = Math.min(prevVal + 0.012, valLoss);
|
| 162 |
+
prevTrain = trainLoss; prevVal = valLoss;
|
| 163 |
+
const stderrTrain = clamp(0.03 - 0.02 * t + Math.abs(randn(rng)) * 0.003, 0.006, 0.04);
|
| 164 |
+
const stderrVal = clamp(0.035 - 0.022 * t + Math.abs(randn(rng)) * 0.003, 0.008, 0.045);
|
| 165 |
+
lines.push(`${run},${steps[i]},train_loss,${formatLike(trainLoss, 3)},${formatLike(stderrTrain, 3)}`);
|
| 166 |
+
lines.push(`${run},${steps[i]},val_loss,${formatLike(valLoss, 3)},${formatLike(stderrVal, 3)}`);
|
| 167 |
+
}
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
// TRAIN ACCURACY & VAL ACCURACY (logistic)
|
| 171 |
+
for (let r = 0; r < runs.length; r++){
|
| 172 |
+
const run = runs[r];
|
| 173 |
+
const p = runParams[r];
|
| 174 |
+
for (let i = 0; i < steps.length; i++){
|
| 175 |
+
const t = stepNorm(steps[i]);
|
| 176 |
+
const accBase = logistic(t, p.kAcc, p.x0Acc);
|
| 177 |
+
let trainAcc = clamp(0.55 + accBase * (p.trainAccFinal - 0.55), 0, 1);
|
| 178 |
+
let valAcc = clamp(0.52 + accBase * (p.valAccFinal - 0.52), 0, 1);
|
| 179 |
+
// Gentle noise
|
| 180 |
+
trainAcc = clamp(trainAcc + randn(rng) * 0.005 * args.amount, 0, 1);
|
| 181 |
+
valAcc = clamp(valAcc + randn(rng) * 0.006 * args.amount, 0, 1);
|
| 182 |
+
const stderrTrain = clamp(0.02 - 0.011 * t + Math.abs(randn(rng)) * 0.002, 0.006, 0.03);
|
| 183 |
+
const stderrVal = clamp(0.022 - 0.012 * t + Math.abs(randn(rng)) * 0.002, 0.007, 0.032);
|
| 184 |
+
lines.push(`${run},${steps[i]},train_accuracy,${formatLike(trainAcc, 4)},${formatLike(stderrTrain, 3)}`);
|
| 185 |
+
lines.push(`${run},${steps[i]},val_accuracy,${formatLike(valAcc, 4)},${formatLike(stderrVal, 3)}`);
|
| 186 |
+
}
|
| 187 |
+
}
|
| 188 |
+
|
| 189 |
+
// Ensure directory exists
|
| 190 |
+
await fs.mkdir(path.dirname(args.out), { recursive: true });
|
| 191 |
+
await fs.writeFile(args.out, lines.join('\n') + '\n', 'utf8');
|
| 192 |
+
const relOut = path.relative(process.cwd(), args.out);
|
| 193 |
+
console.log(`Synthetic CSV generated: ${relOut}`);
|
| 194 |
+
}
|
| 195 |
+
|
| 196 |
+
main().catch(err => { console.error(err?.stack || String(err)); process.exit(1); });
|
app/scripts/jitter-trackio-data.mjs
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env node
|
| 2 |
+
|
| 3 |
+
// Jitter Trackio CSV data with small, controlled noise.
|
| 4 |
+
// - Preserves comments (# ...) and blank lines
|
| 5 |
+
// - Leaves 'epoch' values unchanged
|
| 6 |
+
// - Adds mild noise to train/val accuracy (clamped to [0,1])
|
| 7 |
+
// - Adds mild noise to train/val loss (kept >= 0)
|
| 8 |
+
// - Keeps steps untouched
|
| 9 |
+
// Usage:
|
| 10 |
+
// node app/scripts/jitter-trackio-data.mjs \
|
| 11 |
+
// --in app/src/content/assets/data/trackio_wandb_demo.csv \
|
| 12 |
+
// --out app/src/content/assets/data/trackio_wandb_demo.jitter.csv \
|
| 13 |
+
// [--seed 42] [--amount 1.0] [--in-place]
|
| 14 |
+
|
| 15 |
+
import fs from 'node:fs/promises';
|
| 16 |
+
import path from 'node:path';
|
| 17 |
+
|
| 18 |
+
function parseArgs(argv){
|
| 19 |
+
const args = { in: '', out: '', seed: undefined, amount: 1, inPlace: false };
|
| 20 |
+
for (let i = 2; i < argv.length; i++){
|
| 21 |
+
const a = argv[i];
|
| 22 |
+
if (a === '--in' && argv[i+1]) { args.in = argv[++i]; continue; }
|
| 23 |
+
if (a === '--out' && argv[i+1]) { args.out = argv[++i]; continue; }
|
| 24 |
+
if (a === '--seed' && argv[i+1]) { args.seed = Number(argv[++i]); continue; }
|
| 25 |
+
if (a === '--amount' && argv[i+1]) { args.amount = Number(argv[++i]) || 3; continue; }
|
| 26 |
+
if (a === '--in-place') { args.inPlace = true; continue; }
|
| 27 |
+
}
|
| 28 |
+
if (!args.in) throw new Error('--in is required');
|
| 29 |
+
if (args.inPlace) args.out = args.in;
|
| 30 |
+
if (!args.out) {
|
| 31 |
+
const { dir, name, ext } = path.parse(args.in);
|
| 32 |
+
args.out = path.join(dir, `${name}.jitter${ext || '.csv'}`);
|
| 33 |
+
}
|
| 34 |
+
return args;
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
function mulberry32(seed){
|
| 38 |
+
let t = seed >>> 0;
|
| 39 |
+
return function(){
|
| 40 |
+
t += 0x6D2B79F5;
|
| 41 |
+
let r = Math.imul(t ^ (t >>> 15), 1 | t);
|
| 42 |
+
r ^= r + Math.imul(r ^ (r >>> 7), 61 | r);
|
| 43 |
+
return ((r ^ (r >>> 14)) >>> 0) / 4294967296;
|
| 44 |
+
};
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
function makeRng(seed){
|
| 48 |
+
if (Number.isFinite(seed)) return mulberry32(seed);
|
| 49 |
+
return Math.random;
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
function randn(rng){
|
| 53 |
+
// Box-Muller transform
|
| 54 |
+
let u = 0, v = 0;
|
| 55 |
+
while (u === 0) u = rng();
|
| 56 |
+
while (v === 0) v = rng();
|
| 57 |
+
return Math.sqrt(-2.0 * Math.log(u)) * Math.cos(2.0 * Math.PI * v);
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
function jitterValue(metric, value, amount, rng){
|
| 61 |
+
const m = metric.toLowerCase();
|
| 62 |
+
if (m === 'epoch') return value; // keep as-is
|
| 63 |
+
if (m.includes('accuracy')){
|
| 64 |
+
const n = Math.max(-0.02 * amount, Math.min(0.02 * amount, randn(rng) * 0.01 * amount));
|
| 65 |
+
return Math.max(0, Math.min(1, value + n));
|
| 66 |
+
}
|
| 67 |
+
if (m.includes('loss')){
|
| 68 |
+
const n = Math.max(-0.03 * amount, Math.min(0.03 * amount, randn(rng) * 0.01 * amount));
|
| 69 |
+
return Math.max(0, value + n);
|
| 70 |
+
}
|
| 71 |
+
// default: tiny noise
|
| 72 |
+
const n = Math.max(-0.01 * amount, Math.min(0.01 * amount, randn(rng) * 0.005 * amount));
|
| 73 |
+
return value + n;
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
function formatNumberLike(original, value){
|
| 77 |
+
const s = String(original);
|
| 78 |
+
const dot = s.indexOf('.')
|
| 79 |
+
const decimals = dot >= 0 ? (s.length - dot - 1) : 0;
|
| 80 |
+
if (!Number.isFinite(value)) return s;
|
| 81 |
+
if (decimals <= 0) return String(Math.round(value));
|
| 82 |
+
return value.toFixed(decimals);
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
async function main(){
|
| 86 |
+
const args = parseArgs(process.argv);
|
| 87 |
+
const rng = makeRng(args.seed);
|
| 88 |
+
const raw = await fs.readFile(args.in, 'utf8');
|
| 89 |
+
const lines = raw.split(/\r?\n/);
|
| 90 |
+
const out = new Array(lines.length);
|
| 91 |
+
|
| 92 |
+
for (let i = 0; i < lines.length; i++){
|
| 93 |
+
const line = lines[i];
|
| 94 |
+
if (!line || line.trim().length === 0) { out[i] = line; continue; }
|
| 95 |
+
if (/^\s*#/.test(line)) { out[i] = line; continue; }
|
| 96 |
+
|
| 97 |
+
// Preserve header line unmodified
|
| 98 |
+
if (i === 0 && /^\s*run\s*,\s*step\s*,\s*metric\s*,\s*value\s*,\s*stderr\s*$/i.test(line)) {
|
| 99 |
+
out[i] = line; continue;
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
const cols = line.split(',');
|
| 103 |
+
if (cols.length < 4) { out[i] = line; continue; }
|
| 104 |
+
|
| 105 |
+
const [run, stepStr, metric, valueStr, stderrStr = ''] = cols;
|
| 106 |
+
const trimmedMetric = (metric || '').trim();
|
| 107 |
+
const valueNum = Number((valueStr || '').trim());
|
| 108 |
+
|
| 109 |
+
if (!Number.isFinite(valueNum)) { out[i] = line; continue; }
|
| 110 |
+
|
| 111 |
+
const jittered = jitterValue(trimmedMetric, valueNum, args.amount, rng);
|
| 112 |
+
const valueOut = formatNumberLike(valueStr, jittered);
|
| 113 |
+
|
| 114 |
+
// Reassemble with original column count and positions
|
| 115 |
+
const result = [run, stepStr, metric, valueOut, stderrStr].join(',');
|
| 116 |
+
out[i] = result;
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
const finalText = out.join('\n');
|
| 120 |
+
await fs.writeFile(args.out, finalText, 'utf8');
|
| 121 |
+
const relIn = path.relative(process.cwd(), args.in);
|
| 122 |
+
const relOut = path.relative(process.cwd(), args.out);
|
| 123 |
+
console.log(`Jittered data written: ${relOut} (from ${relIn})`);
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
main().catch(err => {
|
| 127 |
+
console.error(err?.stack || String(err));
|
| 128 |
+
process.exit(1);
|
| 129 |
+
});
|
app/scripts/latex-importer/README.md
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# LaTeX Importer
|
| 2 |
+
|
| 3 |
+
Complete LaTeX to MDX (Markdown + JSX) importer optimized for Astro with advanced support for references, interactive equations, and components.
|
| 4 |
+
|
| 5 |
+
## 🚀 Quick Start
|
| 6 |
+
|
| 7 |
+
```bash
|
| 8 |
+
# Complete LaTeX → MDX conversion with all features
|
| 9 |
+
node index.mjs
|
| 10 |
+
|
| 11 |
+
# For step-by-step debugging
|
| 12 |
+
node latex-converter.mjs # LaTeX → Markdown
|
| 13 |
+
node mdx-converter.mjs # Markdown → MDX
|
| 14 |
+
```
|
| 15 |
+
|
| 16 |
+
## 📁 Structure
|
| 17 |
+
|
| 18 |
+
```
|
| 19 |
+
latex-importer/
|
| 20 |
+
├── index.mjs # Complete LaTeX → MDX pipeline
|
| 21 |
+
├── latex-converter.mjs # LaTeX → Markdown with Pandoc
|
| 22 |
+
├── mdx-converter.mjs # Markdown → MDX with Astro components
|
| 23 |
+
├── reference-preprocessor.mjs # LaTeX references cleanup
|
| 24 |
+
├── post-processor.mjs # Markdown post-processing
|
| 25 |
+
├── bib-cleaner.mjs # Bibliography cleaner
|
| 26 |
+
├── filters/
|
| 27 |
+
│ └── equation-ids.lua # Pandoc filter for KaTeX equations
|
| 28 |
+
├── input/ # LaTeX sources
|
| 29 |
+
│ ├── main.tex
|
| 30 |
+
│ ├── main.bib
|
| 31 |
+
│ └── sections/
|
| 32 |
+
└── output/ # Results
|
| 33 |
+
├── main.md # Intermediate Markdown
|
| 34 |
+
└── main.mdx # Final MDX for Astro
|
| 35 |
+
```
|
| 36 |
+
|
| 37 |
+
## ✨ Key Features
|
| 38 |
+
|
| 39 |
+
### 🎯 **Smart References**
|
| 40 |
+
- **Invisible anchors**: Automatic conversion of `\label{}` to `<span id="..." style="position: absolute;"></span>`
|
| 41 |
+
- **Clean links**: Identifier cleanup (`:` → `-`, removing prefixes `sec:`, `fig:`, `eq:`)
|
| 42 |
+
- **Cross-references**: Full support for `\ref{}` with functional links
|
| 43 |
+
|
| 44 |
+
### 🧮 **Interactive Equations**
|
| 45 |
+
- **KaTeX IDs**: Conversion of `\label{eq:...}` to `\htmlId{id}{equation}`
|
| 46 |
+
- **Equation references**: Clickable links to mathematical equations
|
| 47 |
+
- **Advanced KaTeX support**: `trust: true` configuration for `\htmlId{}`
|
| 48 |
+
|
| 49 |
+
### 🎨 **Automatic Styling**
|
| 50 |
+
- **Highlights**: `\highlight{text}` → `<span class="highlight">text</span>`
|
| 51 |
+
- **Auto cleanup**: Removal of numbering `(1)`, `(2)`, etc.
|
| 52 |
+
- **Astro components**: Images → `Figure` with automatic imports
|
| 53 |
+
|
| 54 |
+
### 🔧 **Robust Pipeline**
|
| 55 |
+
- **LaTeX preprocessor**: Reference cleanup before Pandoc
|
| 56 |
+
- **Lua filter**: Equation processing in Pandoc AST
|
| 57 |
+
- **Post-processor**: Markdown cleanup and optimization
|
| 58 |
+
- **MDX converter**: Final transformation with Astro components
|
| 59 |
+
|
| 60 |
+
## 📊 Example Workflow
|
| 61 |
+
|
| 62 |
+
```bash
|
| 63 |
+
# 1. Prepare LaTeX sources
|
| 64 |
+
cp my-paper/* input/
|
| 65 |
+
|
| 66 |
+
# 2. Complete automatic conversion
|
| 67 |
+
node index.mjs
|
| 68 |
+
|
| 69 |
+
# 3. Generated results
|
| 70 |
+
ls output/
|
| 71 |
+
# → main.md (Intermediate Markdown)
|
| 72 |
+
# → main.mdx (Final MDX for Astro)
|
| 73 |
+
# → assets/image/ (extracted images)
|
| 74 |
+
```
|
| 75 |
+
|
| 76 |
+
### 📋 Conversion Result
|
| 77 |
+
|
| 78 |
+
The pipeline generates an MDX file optimized for Astro with:
|
| 79 |
+
|
| 80 |
+
```mdx
|
| 81 |
+
---
|
| 82 |
+
title: "Your Article Title"
|
| 83 |
+
description: "Generated from LaTeX"
|
| 84 |
+
---
|
| 85 |
+
|
| 86 |
+
import Figure from '../components/Figure.astro';
|
| 87 |
+
import figure1 from '../assets/image/figure1.png';
|
| 88 |
+
|
| 89 |
+
## Section with invisible anchor
|
| 90 |
+
<span id="introduction" style="position: absolute;"></span>
|
| 91 |
+
|
| 92 |
+
Here is some text with <span class="highlight">highlighted words</span>.
|
| 93 |
+
|
| 94 |
+
Reference to an interactive [equation](#equation-name).
|
| 95 |
+
|
| 96 |
+
Equation with KaTeX ID:
|
| 97 |
+
$$\htmlId{equation-name}{E = mc^2}$$
|
| 98 |
+
|
| 99 |
+
<Figure src={figure1} alt="Description" />
|
| 100 |
+
```
|
| 101 |
+
|
| 102 |
+
## ⚙️ Required Astro Configuration
|
| 103 |
+
|
| 104 |
+
To use equations with IDs, add to `astro.config.mjs`:
|
| 105 |
+
|
| 106 |
+
```javascript
|
| 107 |
+
import rehypeKatex from 'rehype-katex';
|
| 108 |
+
|
| 109 |
+
export default defineConfig({
|
| 110 |
+
markdown: {
|
| 111 |
+
rehypePlugins: [
|
| 112 |
+
[rehypeKatex, { trust: true }], // ← Important for \htmlId{}
|
| 113 |
+
],
|
| 114 |
+
},
|
| 115 |
+
});
|
| 116 |
+
```
|
| 117 |
+
|
| 118 |
+
## 🛠️ Prerequisites
|
| 119 |
+
|
| 120 |
+
- **Node.js** with ESM support
|
| 121 |
+
- **Pandoc** (`brew install pandoc`)
|
| 122 |
+
- **Astro** to use the generated MDX
|
| 123 |
+
|
| 124 |
+
## 🎯 Technical Architecture
|
| 125 |
+
|
| 126 |
+
### 4-Stage Pipeline
|
| 127 |
+
|
| 128 |
+
1. **LaTeX Preprocessing** (`reference-preprocessor.mjs`)
|
| 129 |
+
- Cleanup of `\label{}` and `\ref{}`
|
| 130 |
+
- Conversion `\highlight{}` → CSS spans
|
| 131 |
+
- Removal of prefixes and problematic characters
|
| 132 |
+
|
| 133 |
+
2. **Pandoc + Lua Filter** (`equation-ids.lua`)
|
| 134 |
+
- LaTeX → Markdown conversion with `gfm+tex_math_dollars+raw_html`
|
| 135 |
+
- Equation processing: `\label{eq:name}` → `\htmlId{name}{equation}`
|
| 136 |
+
- Automatic image extraction
|
| 137 |
+
|
| 138 |
+
3. **Markdown Post-processing** (`post-processor.mjs`)
|
| 139 |
+
- KaTeX, Unicode, grouping commands cleanup
|
| 140 |
+
- Attribute correction with `:`
|
| 141 |
+
- Code snippet injection
|
| 142 |
+
|
| 143 |
+
4. **MDX Conversion** (`mdx-converter.mjs`)
|
| 144 |
+
- Images transformation → `Figure`
|
| 145 |
+
- HTML span escaping correction
|
| 146 |
+
- Automatic imports generation
|
| 147 |
+
- MDX frontmatter
|
| 148 |
+
|
| 149 |
+
## 📊 Conversion Statistics
|
| 150 |
+
|
| 151 |
+
For a typical scientific document:
|
| 152 |
+
- **87 labels** detected and processed
|
| 153 |
+
- **48 invisible anchors** created
|
| 154 |
+
- **13 highlight spans** with CSS class
|
| 155 |
+
- **4 equations** with `\htmlId{}` KaTeX
|
| 156 |
+
- **40 images** converted to components
|
| 157 |
+
|
| 158 |
+
## ✅ Project Status
|
| 159 |
+
|
| 160 |
+
### 🎉 **Complete Features**
|
| 161 |
+
- ✅ **LaTeX → MDX Pipeline**: Full end-to-end functional conversion
|
| 162 |
+
- ✅ **Cross-document references**: Perfectly functional internal links
|
| 163 |
+
- ✅ **Interactive equations**: KaTeX support with clickable IDs
|
| 164 |
+
- ✅ **Automatic styling**: Highlights and Astro components
|
| 165 |
+
- ✅ **Robustness**: Automatic cleanup of all escaping
|
| 166 |
+
- ✅ **Optimization**: Clean code without unnecessary elements
|
| 167 |
+
|
| 168 |
+
### 🚀 **Production Ready**
|
| 169 |
+
The toolkit is now **100% operational** for converting complex scientific LaTeX documents to MDX/Astro with all advanced features (references, interactive equations, styling).
|
app/scripts/latex-importer/bib-cleaner.mjs
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env node
|
| 2 |
+
|
| 3 |
+
import { readFileSync, writeFileSync, existsSync } from 'fs';
|
| 4 |
+
import { join, dirname, basename } from 'path';
|
| 5 |
+
|
| 6 |
+
/**
|
| 7 |
+
* Clean a BibTeX file by removing local file references and paths
|
| 8 |
+
* @param {string} inputBibFile - Path to the input .bib file
|
| 9 |
+
* @param {string} outputBibFile - Path to the output cleaned .bib file
|
| 10 |
+
* @returns {boolean} - Success status
|
| 11 |
+
*/
|
| 12 |
+
export function cleanBibliography(inputBibFile, outputBibFile) {
|
| 13 |
+
if (!existsSync(inputBibFile)) {
|
| 14 |
+
console.log(' ⚠️ No bibliography file found:', inputBibFile);
|
| 15 |
+
return false;
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
console.log('📚 Cleaning bibliography...');
|
| 19 |
+
let bibContent = readFileSync(inputBibFile, 'utf8');
|
| 20 |
+
|
| 21 |
+
// Remove file paths and local references
|
| 22 |
+
bibContent = bibContent.replace(/file = \{[^}]+\}/g, '');
|
| 23 |
+
|
| 24 |
+
// Remove empty lines created by file removal
|
| 25 |
+
bibContent = bibContent.replace(/,\s*\n\s*\n/g, '\n\n');
|
| 26 |
+
bibContent = bibContent.replace(/,\s*\}/g, '\n}');
|
| 27 |
+
|
| 28 |
+
// Clean up double commas
|
| 29 |
+
bibContent = bibContent.replace(/,,/g, ',');
|
| 30 |
+
|
| 31 |
+
// Remove trailing commas before closing braces
|
| 32 |
+
bibContent = bibContent.replace(/,(\s*\n\s*)\}/g, '$1}');
|
| 33 |
+
|
| 34 |
+
writeFileSync(outputBibFile, bibContent);
|
| 35 |
+
console.log(` 📄 Clean bibliography saved: ${outputBibFile}`);
|
| 36 |
+
|
| 37 |
+
return true;
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
/**
|
| 41 |
+
* CLI for bibliography cleaning
|
| 42 |
+
*/
|
| 43 |
+
function main() {
|
| 44 |
+
const args = process.argv.slice(2);
|
| 45 |
+
|
| 46 |
+
if (args.includes('--help') || args.includes('-h')) {
|
| 47 |
+
console.log(`
|
| 48 |
+
📚 BibTeX Bibliography Cleaner
|
| 49 |
+
|
| 50 |
+
Usage:
|
| 51 |
+
node bib-cleaner.mjs [input.bib] [output.bib]
|
| 52 |
+
node bib-cleaner.mjs --input=input.bib --output=output.bib
|
| 53 |
+
|
| 54 |
+
Options:
|
| 55 |
+
--input=FILE Input .bib file
|
| 56 |
+
--output=FILE Output cleaned .bib file
|
| 57 |
+
--help, -h Show this help
|
| 58 |
+
|
| 59 |
+
Examples:
|
| 60 |
+
# Clean main.bib to clean.bib
|
| 61 |
+
node bib-cleaner.mjs main.bib clean.bib
|
| 62 |
+
|
| 63 |
+
# Using flags
|
| 64 |
+
node bib-cleaner.mjs --input=references.bib --output=clean-refs.bib
|
| 65 |
+
`);
|
| 66 |
+
process.exit(0);
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
let inputFile, outputFile;
|
| 70 |
+
|
| 71 |
+
// Parse command line arguments
|
| 72 |
+
if (args.length >= 2 && !args[0].startsWith('--')) {
|
| 73 |
+
// Positional arguments
|
| 74 |
+
inputFile = args[0];
|
| 75 |
+
outputFile = args[1];
|
| 76 |
+
} else {
|
| 77 |
+
// Named arguments
|
| 78 |
+
for (const arg of args) {
|
| 79 |
+
if (arg.startsWith('--input=')) {
|
| 80 |
+
inputFile = arg.split('=')[1];
|
| 81 |
+
} else if (arg.startsWith('--output=')) {
|
| 82 |
+
outputFile = arg.split('=')[1];
|
| 83 |
+
}
|
| 84 |
+
}
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
if (!inputFile || !outputFile) {
|
| 88 |
+
console.error('❌ Both input and output files are required');
|
| 89 |
+
console.log('Use --help for usage information');
|
| 90 |
+
process.exit(1);
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
const success = cleanBibliography(inputFile, outputFile);
|
| 94 |
+
if (success) {
|
| 95 |
+
console.log('🎉 Bibliography cleaning completed!');
|
| 96 |
+
} else {
|
| 97 |
+
process.exit(1);
|
| 98 |
+
}
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
// Run CLI if called directly
|
| 102 |
+
if (import.meta.url === `file://${process.argv[1]}`) {
|
| 103 |
+
main();
|
| 104 |
+
}
|
app/scripts/latex-importer/filters/equation-ids.lua
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
--[[
|
| 2 |
+
Pandoc Lua filter to add IDs to equations using KaTeX \htmlId syntax
|
| 3 |
+
|
| 4 |
+
This filter processes display math equations and inline math that contain
|
| 5 |
+
\label{} commands, and wraps them with \htmlId{clean-id}{content} for KaTeX.
|
| 6 |
+
|
| 7 |
+
Requirements:
|
| 8 |
+
- KaTeX renderer with trust: true option
|
| 9 |
+
- Equations with \label{} commands in LaTeX
|
| 10 |
+
--]]
|
| 11 |
+
|
| 12 |
+
-- Function to clean identifier strings (remove prefixes and colons)
|
| 13 |
+
function clean_identifier(id_str)
|
| 14 |
+
if id_str and type(id_str) == "string" then
|
| 15 |
+
-- Remove common prefixes and replace colons with dashes
|
| 16 |
+
local clean = id_str
|
| 17 |
+
:gsub("^(eq|equation):", "") -- Remove eq: prefix
|
| 18 |
+
:gsub(":", "-") -- Replace colons with dashes
|
| 19 |
+
:gsub("[^a-zA-Z0-9_-]", "-") -- Replace other problematic chars
|
| 20 |
+
:gsub("-+", "-") -- Collapse multiple dashes
|
| 21 |
+
:gsub("^-", "") -- Remove leading dash
|
| 22 |
+
:gsub("-$", "") -- Remove trailing dash
|
| 23 |
+
|
| 24 |
+
-- Ensure we don't have empty identifiers
|
| 25 |
+
if clean == "" then
|
| 26 |
+
clean = id_str:gsub(":", "-")
|
| 27 |
+
end
|
| 28 |
+
|
| 29 |
+
return clean
|
| 30 |
+
end
|
| 31 |
+
return id_str
|
| 32 |
+
end
|
| 33 |
+
|
| 34 |
+
-- Process Math elements (both inline and display)
|
| 35 |
+
function Math(el)
|
| 36 |
+
local math_content = el.text
|
| 37 |
+
|
| 38 |
+
-- Look for \label{...} commands in the math content
|
| 39 |
+
local label_match = math_content:match("\\label%{([^}]+)%}")
|
| 40 |
+
|
| 41 |
+
if label_match then
|
| 42 |
+
-- Clean the identifier
|
| 43 |
+
local clean_id = clean_identifier(label_match)
|
| 44 |
+
|
| 45 |
+
-- Remove the \label{} command from the math content
|
| 46 |
+
local clean_math = math_content:gsub("\\label%{[^}]+%}", "")
|
| 47 |
+
|
| 48 |
+
-- Clean up any extra whitespace or line breaks that might remain
|
| 49 |
+
clean_math = clean_math:gsub("%s*$", ""):gsub("^%s*", "")
|
| 50 |
+
|
| 51 |
+
-- Handle different equation environments appropriately
|
| 52 |
+
-- For align environments, preserve them as they work with KaTeX
|
| 53 |
+
local has_align = clean_math:match("\\begin%{align%}")
|
| 54 |
+
|
| 55 |
+
if has_align then
|
| 56 |
+
-- For align environments, we keep the structure and add ID as an attribute
|
| 57 |
+
-- KaTeX supports align environments natively
|
| 58 |
+
clean_math = clean_math:gsub("\\begin%{align%}", "\\begin{align}")
|
| 59 |
+
clean_math = clean_math:gsub("\\end%{align%}", "\\end{align}")
|
| 60 |
+
else
|
| 61 |
+
-- Remove other equation environments that don't work well with \htmlId
|
| 62 |
+
clean_math = clean_math:gsub("\\begin%{equation%}", ""):gsub("\\end%{equation%}", "")
|
| 63 |
+
clean_math = clean_math:gsub("\\begin%{equation%*%}", ""):gsub("\\end%{equation%*%}", "")
|
| 64 |
+
clean_math = clean_math:gsub("\\begin%{align%*%}", ""):gsub("\\end%{align%*%}", "")
|
| 65 |
+
end
|
| 66 |
+
|
| 67 |
+
-- Clean up any remaining whitespace
|
| 68 |
+
clean_math = clean_math:gsub("%s*$", ""):gsub("^%s*", "")
|
| 69 |
+
|
| 70 |
+
local new_math
|
| 71 |
+
if has_align then
|
| 72 |
+
-- For align environments, KaTeX doesn't support \htmlId with align
|
| 73 |
+
-- Instead, we add a special marker that the post-processor will convert to a span
|
| 74 |
+
-- This span will serve as an anchor for references
|
| 75 |
+
new_math = "%%ALIGN_ANCHOR_ID{" .. clean_id .. "}%%\n" .. clean_math
|
| 76 |
+
else
|
| 77 |
+
-- For other math, wrap with \htmlId{}
|
| 78 |
+
new_math = "\\htmlId{" .. clean_id .. "}{" .. clean_math .. "}"
|
| 79 |
+
end
|
| 80 |
+
|
| 81 |
+
-- Return new Math element with the updated content
|
| 82 |
+
return pandoc.Math(el.mathtype, new_math)
|
| 83 |
+
end
|
| 84 |
+
|
| 85 |
+
-- Return unchanged if no label found
|
| 86 |
+
return el
|
| 87 |
+
end
|
| 88 |
+
|
| 89 |
+
-- Optional: Process RawInline elements that might contain LaTeX math
|
| 90 |
+
function RawInline(el)
|
| 91 |
+
if el.format == "latex" or el.format == "tex" then
|
| 92 |
+
local content = el.text
|
| 93 |
+
|
| 94 |
+
-- Look for equation environments with labels
|
| 95 |
+
local label_match = content:match("\\label%{([^}]+)%}")
|
| 96 |
+
|
| 97 |
+
if label_match then
|
| 98 |
+
local clean_id = clean_identifier(label_match)
|
| 99 |
+
|
| 100 |
+
-- For raw LaTeX, we might need different handling
|
| 101 |
+
-- This is a simplified approach - adjust based on your needs
|
| 102 |
+
local clean_content = content:gsub("\\label%{[^}]+%}", "")
|
| 103 |
+
|
| 104 |
+
if clean_content:match("\\begin%{equation") or clean_content:match("\\begin%{align") then
|
| 105 |
+
-- For equation environments, we might need to wrap differently
|
| 106 |
+
-- This depends on how your KaTeX setup handles equation environments
|
| 107 |
+
return pandoc.RawInline(el.format, clean_content)
|
| 108 |
+
end
|
| 109 |
+
end
|
| 110 |
+
end
|
| 111 |
+
|
| 112 |
+
return el
|
| 113 |
+
end
|
| 114 |
+
|
| 115 |
+
-- Optional: Process RawBlock elements for display equations
|
| 116 |
+
function RawBlock(el)
|
| 117 |
+
if el.format == "latex" or el.format == "tex" then
|
| 118 |
+
local content = el.text
|
| 119 |
+
|
| 120 |
+
-- Look for equation environments with labels
|
| 121 |
+
local label_match = content:match("\\label%{([^}]+)%}")
|
| 122 |
+
|
| 123 |
+
if label_match then
|
| 124 |
+
local clean_id = clean_identifier(label_match)
|
| 125 |
+
local clean_content = content:gsub("\\label%{[^}]+%}", "")
|
| 126 |
+
|
| 127 |
+
-- For block equations, we might want to preserve the structure
|
| 128 |
+
-- but add the htmlId functionality
|
| 129 |
+
return pandoc.RawBlock(el.format, clean_content)
|
| 130 |
+
end
|
| 131 |
+
end
|
| 132 |
+
|
| 133 |
+
return el
|
| 134 |
+
end
|
app/scripts/latex-importer/index.mjs
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env node
|
| 2 |
+
|
| 3 |
+
import { join, dirname } from 'path';
|
| 4 |
+
import { fileURLToPath } from 'url';
|
| 5 |
+
import { copyFileSync } from 'fs';
|
| 6 |
+
import { convertLatexToMarkdown } from './latex-converter.mjs';
|
| 7 |
+
import { convertToMdx } from './mdx-converter.mjs';
|
| 8 |
+
import { cleanBibliography } from './bib-cleaner.mjs';
|
| 9 |
+
|
| 10 |
+
const __filename = fileURLToPath(import.meta.url);
|
| 11 |
+
const __dirname = dirname(__filename);
|
| 12 |
+
|
| 13 |
+
// Default configuration
|
| 14 |
+
const DEFAULT_INPUT = join(__dirname, 'input', 'main.tex');
|
| 15 |
+
const DEFAULT_OUTPUT = join(__dirname, 'output');
|
| 16 |
+
const ASTRO_CONTENT_PATH = join(__dirname, '..', '..', 'src', 'content', 'article.mdx');
|
| 17 |
+
|
| 18 |
+
function parseArgs() {
|
| 19 |
+
const args = process.argv.slice(2);
|
| 20 |
+
const config = {
|
| 21 |
+
input: DEFAULT_INPUT,
|
| 22 |
+
output: DEFAULT_OUTPUT,
|
| 23 |
+
clean: false,
|
| 24 |
+
bibOnly: false,
|
| 25 |
+
convertOnly: false,
|
| 26 |
+
mdx: false,
|
| 27 |
+
};
|
| 28 |
+
|
| 29 |
+
for (const arg of args) {
|
| 30 |
+
if (arg.startsWith('--input=')) {
|
| 31 |
+
config.input = arg.split('=')[1];
|
| 32 |
+
} else if (arg.startsWith('--output=')) {
|
| 33 |
+
config.output = arg.split('=')[1];
|
| 34 |
+
} else if (arg === '--clean') {
|
| 35 |
+
config.clean = true;
|
| 36 |
+
} else if (arg === '--bib-only') {
|
| 37 |
+
config.bibOnly = true;
|
| 38 |
+
} else if (arg === '--convert-only') {
|
| 39 |
+
config.convertOnly = true;
|
| 40 |
+
}
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
return config;
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
function showHelp() {
|
| 47 |
+
console.log(`
|
| 48 |
+
🚀 LaTeX to Markdown Toolkit
|
| 49 |
+
|
| 50 |
+
Usage:
|
| 51 |
+
node index.mjs [options]
|
| 52 |
+
|
| 53 |
+
Options:
|
| 54 |
+
--input=PATH Input LaTeX file (default: input/main.tex)
|
| 55 |
+
--output=PATH Output directory (default: output/)
|
| 56 |
+
--clean Clean output directory before processing
|
| 57 |
+
--bib-only Only clean bibliography file
|
| 58 |
+
--convert-only Only convert LaTeX to Markdown (skip bib cleaning)
|
| 59 |
+
--help, -h Show this help
|
| 60 |
+
|
| 61 |
+
Examples:
|
| 62 |
+
# Full conversion with bibliography cleaning
|
| 63 |
+
node index.mjs --clean
|
| 64 |
+
|
| 65 |
+
# Only clean bibliography
|
| 66 |
+
node index.mjs --bib-only --input=paper.tex --output=clean/
|
| 67 |
+
|
| 68 |
+
# Only convert LaTeX (use existing clean bibliography)
|
| 69 |
+
node index.mjs --convert-only
|
| 70 |
+
|
| 71 |
+
# Custom paths
|
| 72 |
+
node index.mjs --input=../paper/main.tex --output=../results/ --clean
|
| 73 |
+
`);
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
function main() {
|
| 77 |
+
const args = process.argv.slice(2);
|
| 78 |
+
|
| 79 |
+
if (args.includes('--help') || args.includes('-h')) {
|
| 80 |
+
showHelp();
|
| 81 |
+
process.exit(0);
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
const config = parseArgs();
|
| 85 |
+
|
| 86 |
+
console.log('🚀 LaTeX to Markdown Toolkit');
|
| 87 |
+
console.log('==============================');
|
| 88 |
+
|
| 89 |
+
try {
|
| 90 |
+
if (config.bibOnly) {
|
| 91 |
+
// Only clean bibliography
|
| 92 |
+
console.log('📚 Bibliography cleaning mode');
|
| 93 |
+
const bibInput = config.input.replace('.tex', '.bib');
|
| 94 |
+
const bibOutput = join(config.output, 'main.bib');
|
| 95 |
+
|
| 96 |
+
cleanBibliography(bibInput, bibOutput);
|
| 97 |
+
console.log('🎉 Bibliography cleaning completed!');
|
| 98 |
+
|
| 99 |
+
} else if (config.convertOnly) {
|
| 100 |
+
// Only convert LaTeX
|
| 101 |
+
console.log('📄 Conversion only mode');
|
| 102 |
+
convertLatexToMarkdown(config.input, config.output);
|
| 103 |
+
|
| 104 |
+
} else {
|
| 105 |
+
// Full workflow
|
| 106 |
+
console.log('🔄 Full conversion workflow');
|
| 107 |
+
convertLatexToMarkdown(config.input, config.output);
|
| 108 |
+
|
| 109 |
+
// Convert to MDX if requested
|
| 110 |
+
const markdownFile = join(config.output, 'main.md');
|
| 111 |
+
const mdxFile = join(config.output, 'main.mdx');
|
| 112 |
+
|
| 113 |
+
console.log('📝 Converting Markdown to MDX...');
|
| 114 |
+
convertToMdx(markdownFile, mdxFile);
|
| 115 |
+
|
| 116 |
+
// Copy MDX to Astro content directory
|
| 117 |
+
console.log('📋 Copying MDX to Astro content directory...');
|
| 118 |
+
try {
|
| 119 |
+
copyFileSync(mdxFile, ASTRO_CONTENT_PATH);
|
| 120 |
+
console.log(` ✅ Copied to ${ASTRO_CONTENT_PATH}`);
|
| 121 |
+
} catch (error) {
|
| 122 |
+
console.warn(` ⚠️ Failed to copy MDX to Astro: ${error.message}`);
|
| 123 |
+
}
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
} catch (error) {
|
| 127 |
+
console.error('❌ Error:', error.message);
|
| 128 |
+
process.exit(1);
|
| 129 |
+
}
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
// Export functions for use as module
|
| 133 |
+
export { convertLatexToMarkdown, cleanBibliography };
|
| 134 |
+
|
| 135 |
+
// Run CLI if called directly
|
| 136 |
+
if (import.meta.url === `file://${process.argv[1]}`) {
|
| 137 |
+
main();
|
| 138 |
+
}
|
app/scripts/latex-importer/latex-converter.mjs
ADDED
|
@@ -0,0 +1,330 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env node
|
| 2 |
+
|
| 3 |
+
import { execSync } from 'child_process';
|
| 4 |
+
import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
|
| 5 |
+
import { join, dirname, basename } from 'path';
|
| 6 |
+
import { fileURLToPath } from 'url';
|
| 7 |
+
import { cleanBibliography } from './bib-cleaner.mjs';
|
| 8 |
+
import { postProcessMarkdown } from './post-processor.mjs';
|
| 9 |
+
import { preprocessLatexReferences } from './reference-preprocessor.mjs';
|
| 10 |
+
|
| 11 |
+
const __filename = fileURLToPath(import.meta.url);
|
| 12 |
+
const __dirname = dirname(__filename);
|
| 13 |
+
|
| 14 |
+
// Configuration
|
| 15 |
+
const DEFAULT_INPUT = join(__dirname, 'input', 'main.tex');
|
| 16 |
+
const DEFAULT_OUTPUT = join(__dirname, 'output');
|
| 17 |
+
|
| 18 |
+
function parseArgs() {
|
| 19 |
+
const args = process.argv.slice(2);
|
| 20 |
+
const config = {
|
| 21 |
+
input: DEFAULT_INPUT,
|
| 22 |
+
output: DEFAULT_OUTPUT,
|
| 23 |
+
clean: false
|
| 24 |
+
};
|
| 25 |
+
|
| 26 |
+
for (const arg of args) {
|
| 27 |
+
if (arg.startsWith('--input=')) {
|
| 28 |
+
config.input = arg.split('=')[1];
|
| 29 |
+
} else if (arg.startsWith('--output=')) {
|
| 30 |
+
config.output = arg.split('=')[1];
|
| 31 |
+
} else if (arg === '--clean') {
|
| 32 |
+
config.clean = true;
|
| 33 |
+
}
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
return config;
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
function ensureDirectory(dir) {
|
| 40 |
+
if (!existsSync(dir)) {
|
| 41 |
+
mkdirSync(dir, { recursive: true });
|
| 42 |
+
}
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
function cleanDirectory(dir) {
|
| 46 |
+
if (existsSync(dir)) {
|
| 47 |
+
execSync(`rm -rf "${dir}"/*`, { stdio: 'inherit' });
|
| 48 |
+
}
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
function preprocessLatexFile(inputFile, outputDir) {
|
| 52 |
+
const inputDir = dirname(inputFile);
|
| 53 |
+
const tempFile = join(outputDir, 'temp_main.tex');
|
| 54 |
+
|
| 55 |
+
console.log('🔄 Preprocessing LaTeX file to resolve \\input commands...');
|
| 56 |
+
|
| 57 |
+
let content = readFileSync(inputFile, 'utf8');
|
| 58 |
+
|
| 59 |
+
// Remove problematic commands that break pandoc
|
| 60 |
+
console.log('🧹 Cleaning problematic LaTeX constructs...');
|
| 61 |
+
|
| 62 |
+
// Fix citation issues - but not in citation keys
|
| 63 |
+
content = content.replace(/\$p_0\$(?![A-Za-z])/g, 'p0');
|
| 64 |
+
|
| 65 |
+
// Convert complex math environments to simple delimiters
|
| 66 |
+
content = content.replace(/\$\$\\begin\{equation\*\}/g, '$$');
|
| 67 |
+
content = content.replace(/\\end\{equation\*\}\$\$/g, '$$');
|
| 68 |
+
content = content.replace(/\\begin\{equation\*\}/g, '$$');
|
| 69 |
+
content = content.replace(/\\end\{equation\*\}/g, '$$');
|
| 70 |
+
// Keep align environments intact for KaTeX support
|
| 71 |
+
// Protect align environments by temporarily replacing them before cleaning & operators
|
| 72 |
+
const alignBlocks = [];
|
| 73 |
+
content = content.replace(/\\begin\{align\}([\s\S]*?)\\end\{align\}/g, (match, alignContent) => {
|
| 74 |
+
alignBlocks.push(match);
|
| 75 |
+
return `__ALIGN_BLOCK_${alignBlocks.length - 1}__`;
|
| 76 |
+
});
|
| 77 |
+
|
| 78 |
+
// Now remove & operators from non-align content (outside align environments)
|
| 79 |
+
content = content.replace(/&=/g, '=');
|
| 80 |
+
content = content.replace(/&/g, '');
|
| 81 |
+
|
| 82 |
+
// Restore align blocks with their & operators intact
|
| 83 |
+
alignBlocks.forEach((block, index) => {
|
| 84 |
+
content = content.replace(`__ALIGN_BLOCK_${index}__`, block);
|
| 85 |
+
});
|
| 86 |
+
|
| 87 |
+
// Convert LaTeX citations to Pandoc format
|
| 88 |
+
content = content.replace(/\\cite[tp]?\{([^}]+)\}/g, (match, citations) => {
|
| 89 |
+
// Handle multiple citations separated by commas - all become simple @citations
|
| 90 |
+
return citations.split(',').map(cite => `@${cite.trim()}`).join(', ');
|
| 91 |
+
});
|
| 92 |
+
|
| 93 |
+
// Handle complex \textsc with nested math - extract and simplify (but not in command definitions)
|
| 94 |
+
content = content.replace(/\\textsc\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}/g, (match, content_inside, offset) => {
|
| 95 |
+
// Skip if this is inside a \newcommand or similar definition
|
| 96 |
+
const before = content.substring(Math.max(0, offset - 50), offset);
|
| 97 |
+
if (before.includes('\\newcommand') || before.includes('\\renewcommand') || before.includes('\\def')) {
|
| 98 |
+
return match; // Keep original
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
// Remove math delimiters inside textsc for simplification
|
| 102 |
+
const simplified = content_inside.replace(/\\\([^)]+\\\)/g, 'MATHEXPR');
|
| 103 |
+
return `\\text{${simplified}}`;
|
| 104 |
+
});
|
| 105 |
+
|
| 106 |
+
// Remove complex custom commands that pandoc can't handle
|
| 107 |
+
content = content.replace(/\\input\{snippets\/[^}]+\}/g, '% Code snippet removed');
|
| 108 |
+
|
| 109 |
+
// Find all \input{} commands (but skip commented ones)
|
| 110 |
+
const inputRegex = /^([^%]*?)\\input\{([^}]+)\}/gm;
|
| 111 |
+
let match;
|
| 112 |
+
|
| 113 |
+
while ((match = inputRegex.exec(content)) !== null) {
|
| 114 |
+
const beforeInput = match[1];
|
| 115 |
+
const inputPath = match[2];
|
| 116 |
+
|
| 117 |
+
// Skip if the \input is commented (% appears before \input on the line)
|
| 118 |
+
if (beforeInput.includes('%')) {
|
| 119 |
+
continue;
|
| 120 |
+
}
|
| 121 |
+
let fullPath;
|
| 122 |
+
|
| 123 |
+
// Skip only problematic files, let Pandoc handle macros
|
| 124 |
+
if (inputPath.includes('snippets/')) {
|
| 125 |
+
console.log(` Skipping: ${inputPath}`);
|
| 126 |
+
content = content.replace(`\\input{${inputPath}}`, `% Skipped: ${inputPath}`);
|
| 127 |
+
continue;
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
// Handle paths with or without .tex extension
|
| 131 |
+
if (inputPath.endsWith('.tex')) {
|
| 132 |
+
fullPath = join(inputDir, inputPath);
|
| 133 |
+
} else {
|
| 134 |
+
fullPath = join(inputDir, inputPath + '.tex');
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
if (existsSync(fullPath)) {
|
| 138 |
+
console.log(` Including: ${inputPath}`);
|
| 139 |
+
let includedContent = readFileSync(fullPath, 'utf8');
|
| 140 |
+
|
| 141 |
+
// Clean included content too
|
| 142 |
+
includedContent = includedContent.replace(/\$p_0\$/g, 'p0');
|
| 143 |
+
includedContent = includedContent.replace(/\\input\{snippets\/[^}]+\}/g, '% Code snippet removed');
|
| 144 |
+
|
| 145 |
+
// Handle complex \textsc in included content
|
| 146 |
+
includedContent = includedContent.replace(/\\textsc\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}/g, (match, content_inside, offset) => {
|
| 147 |
+
// Skip if this is inside a \newcommand or similar definition
|
| 148 |
+
const before = includedContent.substring(Math.max(0, offset - 50), offset);
|
| 149 |
+
if (before.includes('\\newcommand') || before.includes('\\renewcommand') || before.includes('\\def')) {
|
| 150 |
+
return match; // Keep original
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
const simplified = content_inside.replace(/\\\([^)]+\\\)/g, 'MATHEXPR');
|
| 154 |
+
return `\\text{${simplified}}`;
|
| 155 |
+
});
|
| 156 |
+
|
| 157 |
+
// Apply same align-preserving logic to included content
|
| 158 |
+
const alignBlocksIncluded = [];
|
| 159 |
+
includedContent = includedContent.replace(/\\begin\{align\}([\s\S]*?)\\end\{align\}/g, (match, alignContent) => {
|
| 160 |
+
alignBlocksIncluded.push(match);
|
| 161 |
+
return `__ALIGN_BLOCK_${alignBlocksIncluded.length - 1}__`;
|
| 162 |
+
});
|
| 163 |
+
|
| 164 |
+
// Remove alignment operators from non-align content in included files
|
| 165 |
+
includedContent = includedContent.replace(/&=/g, '=');
|
| 166 |
+
includedContent = includedContent.replace(/&/g, '');
|
| 167 |
+
|
| 168 |
+
// Restore align blocks with their & operators intact
|
| 169 |
+
alignBlocksIncluded.forEach((block, index) => {
|
| 170 |
+
includedContent = includedContent.replace(`__ALIGN_BLOCK_${index}__`, block);
|
| 171 |
+
});
|
| 172 |
+
|
| 173 |
+
// Convert math environments in included content
|
| 174 |
+
includedContent = includedContent.replace(/\$\$\\begin\{equation\*\}/g, '$$');
|
| 175 |
+
includedContent = includedContent.replace(/\\end\{equation\*\}\$\$/g, '$$');
|
| 176 |
+
includedContent = includedContent.replace(/\\begin\{equation\*\}/g, '$$');
|
| 177 |
+
includedContent = includedContent.replace(/\\end\{equation\*\}/g, '$$');
|
| 178 |
+
|
| 179 |
+
// Convert citations in included content
|
| 180 |
+
includedContent = includedContent.replace(/\\cite[tp]?\{([^}]+)\}/g, (match, citations) => {
|
| 181 |
+
return citations.split(',').map(cite => `@${cite.trim()}`).join(', ');
|
| 182 |
+
});
|
| 183 |
+
|
| 184 |
+
content = content.replace(`\\input{${inputPath}}`, includedContent);
|
| 185 |
+
} else {
|
| 186 |
+
console.log(` ⚠️ File not found: ${fullPath} (skipping)`);
|
| 187 |
+
content = content.replace(`\\input{${inputPath}}`, `% File not found: ${inputPath}`);
|
| 188 |
+
}
|
| 189 |
+
}
|
| 190 |
+
|
| 191 |
+
// Apply reference preprocessing AFTER input inclusion to ensure all references are captured
|
| 192 |
+
console.log('🔧 Preprocessing LaTeX references for MDX compatibility...');
|
| 193 |
+
const referenceResult = preprocessLatexReferences(content);
|
| 194 |
+
content = referenceResult.content;
|
| 195 |
+
|
| 196 |
+
// Write the preprocessed file
|
| 197 |
+
writeFileSync(tempFile, content);
|
| 198 |
+
return tempFile;
|
| 199 |
+
}
|
| 200 |
+
|
| 201 |
+
function processBibliography(inputFile, outputDir) {
|
| 202 |
+
const bibFile = join(dirname(inputFile), 'main.bib');
|
| 203 |
+
const outputBibFile = join(outputDir, 'main.bib');
|
| 204 |
+
|
| 205 |
+
if (!existsSync(bibFile)) {
|
| 206 |
+
console.log(' ⚠️ No bibliography file found');
|
| 207 |
+
return null;
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
const success = cleanBibliography(bibFile, outputBibFile);
|
| 211 |
+
return success ? outputBibFile : null;
|
| 212 |
+
}
|
| 213 |
+
|
| 214 |
+
export function convertLatexToMarkdown(inputFile, outputDir) {
|
| 215 |
+
console.log('🚀 Simple LaTeX to Markdown Converter');
|
| 216 |
+
console.log(`📁 Input: ${inputFile}`);
|
| 217 |
+
console.log(`📁 Output: ${outputDir}`);
|
| 218 |
+
|
| 219 |
+
// Check if input file exists
|
| 220 |
+
if (!existsSync(inputFile)) {
|
| 221 |
+
console.error(`❌ Input file not found: ${inputFile}`);
|
| 222 |
+
process.exit(1);
|
| 223 |
+
}
|
| 224 |
+
|
| 225 |
+
// Ensure output directory exists
|
| 226 |
+
ensureDirectory(outputDir);
|
| 227 |
+
|
| 228 |
+
try {
|
| 229 |
+
// Check if pandoc is available
|
| 230 |
+
execSync('pandoc --version', { stdio: 'pipe' });
|
| 231 |
+
} catch (error) {
|
| 232 |
+
console.error('❌ Pandoc not found. Please install it: brew install pandoc');
|
| 233 |
+
process.exit(1);
|
| 234 |
+
}
|
| 235 |
+
|
| 236 |
+
// Clean and copy bibliography
|
| 237 |
+
const cleanBibFile = processBibliography(inputFile, outputDir);
|
| 238 |
+
|
| 239 |
+
// Preprocess the LaTeX file to resolve \input commands
|
| 240 |
+
const preprocessedFile = preprocessLatexFile(inputFile, outputDir);
|
| 241 |
+
|
| 242 |
+
const inputFileName = basename(inputFile, '.tex');
|
| 243 |
+
const outputFile = join(outputDir, `${inputFileName}.md`);
|
| 244 |
+
|
| 245 |
+
try {
|
| 246 |
+
console.log('📄 Converting with Pandoc...');
|
| 247 |
+
|
| 248 |
+
// Enhanced pandoc conversion - use tex_math_dollars for KaTeX compatibility
|
| 249 |
+
const bibOption = cleanBibFile ? `--bibliography="${cleanBibFile}"` : '';
|
| 250 |
+
|
| 251 |
+
// Use gfm+tex_math_dollars for simple $ delimiters compatible with KaTeX
|
| 252 |
+
const mediaDir = join(outputDir, 'assets', 'image');
|
| 253 |
+
ensureDirectory(mediaDir);
|
| 254 |
+
const inputDir = dirname(inputFile);
|
| 255 |
+
const equationFilterPath = join(__dirname, 'filters', 'equation-ids.lua');
|
| 256 |
+
const pandocCommand = `pandoc "${preprocessedFile}" -f latex+latex_macros -t gfm+tex_math_dollars+raw_html --shift-heading-level-by=1 --wrap=none ${bibOption} --extract-media="${mediaDir}" --resource-path="${inputDir}" --lua-filter="${equationFilterPath}" -o "${outputFile}"`;
|
| 257 |
+
|
| 258 |
+
console.log(` Running: ${pandocCommand}`);
|
| 259 |
+
execSync(pandocCommand, { stdio: 'pipe' });
|
| 260 |
+
|
| 261 |
+
// Clean up temp file
|
| 262 |
+
execSync(`rm "${preprocessedFile}"`, { stdio: 'pipe' });
|
| 263 |
+
|
| 264 |
+
// Post-processing to fix KaTeX incompatible constructions
|
| 265 |
+
let markdownContent = readFileSync(outputFile, 'utf8');
|
| 266 |
+
|
| 267 |
+
// Use modular post-processor with code injection
|
| 268 |
+
markdownContent = postProcessMarkdown(markdownContent, inputDir);
|
| 269 |
+
|
| 270 |
+
writeFileSync(outputFile, markdownContent);
|
| 271 |
+
|
| 272 |
+
console.log(`✅ Conversion completed: ${outputFile}`);
|
| 273 |
+
|
| 274 |
+
// Show file size
|
| 275 |
+
const stats = execSync(`wc -l "${outputFile}"`, { encoding: 'utf8' });
|
| 276 |
+
const lines = stats.trim().split(' ')[0];
|
| 277 |
+
console.log(`📊 Result: ${lines} lines written`);
|
| 278 |
+
|
| 279 |
+
} catch (error) {
|
| 280 |
+
console.error('❌ Pandoc conversion failed:');
|
| 281 |
+
console.error(error.message);
|
| 282 |
+
// Clean up temp file on error
|
| 283 |
+
try {
|
| 284 |
+
execSync(`rm "${preprocessedFile}"`, { stdio: 'pipe' });
|
| 285 |
+
} catch { }
|
| 286 |
+
process.exit(1);
|
| 287 |
+
}
|
| 288 |
+
}
|
| 289 |
+
|
| 290 |
+
function main() {
|
| 291 |
+
const config = parseArgs();
|
| 292 |
+
|
| 293 |
+
if (config.clean) {
|
| 294 |
+
console.log('🧹 Cleaning output directory...');
|
| 295 |
+
cleanDirectory(config.output);
|
| 296 |
+
}
|
| 297 |
+
|
| 298 |
+
convertLatexToMarkdown(config.input, config.output);
|
| 299 |
+
|
| 300 |
+
console.log('🎉 Simple conversion completed!');
|
| 301 |
+
}
|
| 302 |
+
|
| 303 |
+
// Show help if requested
|
| 304 |
+
if (process.argv.includes('--help') || process.argv.includes('-h')) {
|
| 305 |
+
console.log(`
|
| 306 |
+
🚀 Simple LaTeX to Markdown Converter
|
| 307 |
+
|
| 308 |
+
Usage:
|
| 309 |
+
node scripts/simple-latex-to-markdown.mjs [options]
|
| 310 |
+
|
| 311 |
+
Options:
|
| 312 |
+
--input=PATH Input LaTeX file (default: latex-converter/input-example/main.tex)
|
| 313 |
+
--output=PATH Output directory (default: output/)
|
| 314 |
+
--clean Clean output directory before conversion
|
| 315 |
+
--help, -h Show this help
|
| 316 |
+
|
| 317 |
+
Examples:
|
| 318 |
+
# Basic conversion
|
| 319 |
+
node scripts/simple-latex-to-markdown.mjs
|
| 320 |
+
|
| 321 |
+
# Custom paths
|
| 322 |
+
node scripts/simple-latex-to-markdown.mjs --input=my-paper.tex --output=converted/
|
| 323 |
+
|
| 324 |
+
# Clean output first
|
| 325 |
+
node scripts/simple-latex-to-markdown.mjs --clean
|
| 326 |
+
`);
|
| 327 |
+
process.exit(0);
|
| 328 |
+
}
|
| 329 |
+
|
| 330 |
+
main();
|
app/scripts/latex-importer/mdx-converter.mjs
ADDED
|
@@ -0,0 +1,896 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env node
|
| 2 |
+
|
| 3 |
+
import { readFileSync, writeFileSync, existsSync } from 'fs';
|
| 4 |
+
import { join, dirname, basename, extname } from 'path';
|
| 5 |
+
import { fileURLToPath } from 'url';
|
| 6 |
+
import { extractAndGenerateFrontmatter } from './metadata-extractor.mjs';
|
| 7 |
+
|
| 8 |
+
const __filename = fileURLToPath(import.meta.url);
|
| 9 |
+
const __dirname = dirname(__filename);
|
| 10 |
+
|
| 11 |
+
// Configuration
|
| 12 |
+
const DEFAULT_INPUT = join(__dirname, 'output', 'main.md');
|
| 13 |
+
const DEFAULT_OUTPUT = join(__dirname, 'output', 'main.mdx');
|
| 14 |
+
|
| 15 |
+
function parseArgs() {
|
| 16 |
+
const args = process.argv.slice(2);
|
| 17 |
+
const config = {
|
| 18 |
+
input: DEFAULT_INPUT,
|
| 19 |
+
output: DEFAULT_OUTPUT,
|
| 20 |
+
};
|
| 21 |
+
|
| 22 |
+
for (const arg of args) {
|
| 23 |
+
if (arg.startsWith('--input=')) {
|
| 24 |
+
config.input = arg.substring('--input='.length);
|
| 25 |
+
} else if (arg.startsWith('--output=')) {
|
| 26 |
+
config.output = arg.substring('--output='.length);
|
| 27 |
+
} else if (arg === '--help' || arg === '-h') {
|
| 28 |
+
console.log(`
|
| 29 |
+
📝 Markdown to MDX Converter
|
| 30 |
+
|
| 31 |
+
Usage:
|
| 32 |
+
node mdx-converter.mjs [options]
|
| 33 |
+
|
| 34 |
+
Options:
|
| 35 |
+
--input=PATH Input Markdown file (default: ${DEFAULT_INPUT})
|
| 36 |
+
--output=PATH Output MDX file (default: ${DEFAULT_OUTPUT})
|
| 37 |
+
--help, -h Show this help
|
| 38 |
+
|
| 39 |
+
Examples:
|
| 40 |
+
# Basic conversion
|
| 41 |
+
node mdx-converter.mjs
|
| 42 |
+
|
| 43 |
+
# Custom paths
|
| 44 |
+
node mdx-converter.mjs --input=article.md --output=article.mdx
|
| 45 |
+
`);
|
| 46 |
+
process.exit(0);
|
| 47 |
+
} else if (!config.input) {
|
| 48 |
+
config.input = arg;
|
| 49 |
+
} else if (!config.output) {
|
| 50 |
+
config.output = arg;
|
| 51 |
+
}
|
| 52 |
+
}
|
| 53 |
+
return config;
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
/**
|
| 57 |
+
* Modular MDX post-processing functions for Astro compatibility
|
| 58 |
+
* Each function handles a specific type of transformation
|
| 59 |
+
*/
|
| 60 |
+
|
| 61 |
+
/**
|
| 62 |
+
* Track which Astro components are used during transformations
|
| 63 |
+
*/
|
| 64 |
+
const usedComponents = new Set();
|
| 65 |
+
|
| 66 |
+
/**
|
| 67 |
+
* Track individual image imports needed
|
| 68 |
+
*/
|
| 69 |
+
const imageImports = new Map(); // src -> varName
|
| 70 |
+
|
| 71 |
+
/**
|
| 72 |
+
* Add required component imports to the frontmatter
|
| 73 |
+
* @param {string} content - MDX content
|
| 74 |
+
* @returns {string} - Content with component imports
|
| 75 |
+
*/
|
| 76 |
+
/**
|
| 77 |
+
* Generate a variable name from image path
|
| 78 |
+
* @param {string} src - Image source path
|
| 79 |
+
* @returns {string} - Valid variable name
|
| 80 |
+
*/
|
| 81 |
+
function generateImageVarName(src) {
|
| 82 |
+
// Extract filename without extension and make it a valid JS variable
|
| 83 |
+
const filename = src.split('/').pop().replace(/\.[^.]+$/, '');
|
| 84 |
+
return filename.replace(/[^a-zA-Z0-9]/g, '_').replace(/^[0-9]/, 'img_$&');
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
function addComponentImports(content) {
|
| 88 |
+
console.log(' 📦 Adding component and image imports...');
|
| 89 |
+
|
| 90 |
+
let imports = [];
|
| 91 |
+
|
| 92 |
+
// Add component imports
|
| 93 |
+
if (usedComponents.size > 0) {
|
| 94 |
+
const componentImports = Array.from(usedComponents)
|
| 95 |
+
.map(component => `import ${component} from '../components/${component}.astro';`);
|
| 96 |
+
imports.push(...componentImports);
|
| 97 |
+
console.log(` ✅ Importing components: ${Array.from(usedComponents).join(', ')}`);
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
// Add image imports
|
| 101 |
+
if (imageImports.size > 0) {
|
| 102 |
+
const imageImportStatements = Array.from(imageImports.entries())
|
| 103 |
+
.map(([src, varName]) => `import ${varName} from '${src}';`);
|
| 104 |
+
imports.push(...imageImportStatements);
|
| 105 |
+
console.log(` ✅ Importing ${imageImports.size} image(s)`);
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
if (imports.length === 0) {
|
| 109 |
+
console.log(' ℹ️ No imports needed');
|
| 110 |
+
return content;
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
const importBlock = imports.join('\n');
|
| 114 |
+
|
| 115 |
+
// Insert imports after frontmatter
|
| 116 |
+
const frontmatterEnd = content.indexOf('---', 3) + 3;
|
| 117 |
+
if (frontmatterEnd > 2) {
|
| 118 |
+
return content.slice(0, frontmatterEnd) + '\n\n' + importBlock + '\n' + content.slice(frontmatterEnd);
|
| 119 |
+
} else {
|
| 120 |
+
// No frontmatter, add at beginning
|
| 121 |
+
return importBlock + '\n\n' + content;
|
| 122 |
+
}
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
/**
|
| 127 |
+
* Convert grouped figures (subfigures) to MultiFigure components
|
| 128 |
+
* @param {string} content - MDX content
|
| 129 |
+
* @returns {string} - Content with MultiFigure components for grouped figures
|
| 130 |
+
*/
|
| 131 |
+
function convertSubfiguresToMultiFigure(content) {
|
| 132 |
+
console.log(' 🖼️✨ Converting subfigures to MultiFigure components...');
|
| 133 |
+
|
| 134 |
+
let convertedCount = 0;
|
| 135 |
+
|
| 136 |
+
// Pattern to match: <figure> containing multiple <figure> elements with a global caption
|
| 137 |
+
// This matches the LaTeX subfigure pattern that gets converted by Pandoc
|
| 138 |
+
const subfigureGroupPattern = /<figure>\s*((?:<figure>[\s\S]*?<\/figure>\s*){2,})<figcaption>([\s\S]*?)<\/figcaption>\s*<\/figure>/g;
|
| 139 |
+
|
| 140 |
+
const convertedContent = content.replace(subfigureGroupPattern, (match, figuresMatch, globalCaption) => {
|
| 141 |
+
convertedCount++;
|
| 142 |
+
|
| 143 |
+
// Extract individual figures within the group
|
| 144 |
+
// This pattern is more flexible to handle variations in HTML structure
|
| 145 |
+
const individualFigurePattern = /<figure>\s*<img src="([^"]*)"[^>]*\/>\s*<p><span id="([^"]*)"[^&]*><\/span><\/p>\s*<figcaption>([\s\S]*?)<\/figcaption>\s*<\/figure>/g;
|
| 146 |
+
|
| 147 |
+
const images = [];
|
| 148 |
+
let figureMatch;
|
| 149 |
+
|
| 150 |
+
while ((figureMatch = individualFigurePattern.exec(figuresMatch)) !== null) {
|
| 151 |
+
const [, src, id, caption] = figureMatch;
|
| 152 |
+
|
| 153 |
+
// Clean the source path (similar to existing transformImages function)
|
| 154 |
+
const cleanSrc = src.replace(/.*\/output\/assets\//, './assets/')
|
| 155 |
+
.replace(/\/Users\/[^\/]+\/[^\/]+\/[^\/]+\/[^\/]+\/[^\/]+\/app\/scripts\/latex-to-markdown\/output\/assets\//, './assets/');
|
| 156 |
+
|
| 157 |
+
// Clean caption text (remove HTML, normalize whitespace)
|
| 158 |
+
const cleanCaption = caption
|
| 159 |
+
.replace(/<[^>]*>/g, '')
|
| 160 |
+
.replace(/\n/g, ' ')
|
| 161 |
+
.replace(/\s+/g, ' ')
|
| 162 |
+
.replace(/'/g, "\\'")
|
| 163 |
+
.trim();
|
| 164 |
+
|
| 165 |
+
// Generate alt text from caption
|
| 166 |
+
const altText = cleanCaption.length > 100
|
| 167 |
+
? cleanCaption.substring(0, 100) + '...'
|
| 168 |
+
: cleanCaption;
|
| 169 |
+
|
| 170 |
+
// Generate variable name for import
|
| 171 |
+
const varName = generateImageVarName(cleanSrc);
|
| 172 |
+
imageImports.set(cleanSrc, varName);
|
| 173 |
+
|
| 174 |
+
images.push({
|
| 175 |
+
src: varName,
|
| 176 |
+
alt: altText,
|
| 177 |
+
caption: cleanCaption,
|
| 178 |
+
id: id
|
| 179 |
+
});
|
| 180 |
+
}
|
| 181 |
+
|
| 182 |
+
// Clean global caption
|
| 183 |
+
const cleanGlobalCaption = globalCaption
|
| 184 |
+
.replace(/<[^>]*>/g, '')
|
| 185 |
+
.replace(/\n/g, ' ')
|
| 186 |
+
.replace(/\s+/g, ' ')
|
| 187 |
+
.replace(/'/g, "\\'")
|
| 188 |
+
.trim();
|
| 189 |
+
|
| 190 |
+
// Mark MultiFigure component as used
|
| 191 |
+
usedComponents.add('MultiFigure');
|
| 192 |
+
|
| 193 |
+
// Determine layout based on number of images
|
| 194 |
+
let layout = 'auto';
|
| 195 |
+
if (images.length === 2) layout = '2-column';
|
| 196 |
+
else if (images.length === 3) layout = '3-column';
|
| 197 |
+
else if (images.length === 4) layout = '4-column';
|
| 198 |
+
|
| 199 |
+
// Generate MultiFigure component
|
| 200 |
+
const imagesJson = images.map(img =>
|
| 201 |
+
` {\n src: ${img.src},\n alt: "${img.alt}",\n caption: "${img.caption}",\n id: "${img.id}"\n }`
|
| 202 |
+
).join(',\n');
|
| 203 |
+
|
| 204 |
+
return `<MultiFigure
|
| 205 |
+
images={[
|
| 206 |
+
${imagesJson}
|
| 207 |
+
]}
|
| 208 |
+
layout="${layout}"
|
| 209 |
+
zoomable
|
| 210 |
+
downloadable
|
| 211 |
+
caption="${cleanGlobalCaption}"
|
| 212 |
+
/>`;
|
| 213 |
+
});
|
| 214 |
+
|
| 215 |
+
if (convertedCount > 0) {
|
| 216 |
+
console.log(` ✅ Converted ${convertedCount} subfigure group(s) to MultiFigure component(s)`);
|
| 217 |
+
} else {
|
| 218 |
+
console.log(' ℹ️ No subfigure groups found');
|
| 219 |
+
}
|
| 220 |
+
|
| 221 |
+
return convertedContent;
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
/**
|
| 225 |
+
* Transform images to Figure components
|
| 226 |
+
* @param {string} content - MDX content
|
| 227 |
+
* @returns {string} - Content with Figure components
|
| 228 |
+
*/
|
| 229 |
+
/**
|
| 230 |
+
* Create Figure component with import
|
| 231 |
+
* @param {string} src - Clean image source
|
| 232 |
+
* @param {string} alt - Alt text
|
| 233 |
+
* @param {string} id - Element ID
|
| 234 |
+
* @param {string} caption - Figure caption
|
| 235 |
+
* @param {string} width - Optional width
|
| 236 |
+
* @returns {string} - Figure component markup
|
| 237 |
+
*/
|
| 238 |
+
function createFigureComponent(src, alt = '', id = '', caption = '', width = '') {
|
| 239 |
+
const varName = generateImageVarName(src);
|
| 240 |
+
imageImports.set(src, varName);
|
| 241 |
+
usedComponents.add('Figure');
|
| 242 |
+
|
| 243 |
+
const props = [];
|
| 244 |
+
props.push(`src={${varName}}`);
|
| 245 |
+
props.push('zoomable');
|
| 246 |
+
props.push('downloadable');
|
| 247 |
+
if (id) props.push(`id="${id}"`);
|
| 248 |
+
props.push('layout="fixed"');
|
| 249 |
+
if (alt) props.push(`alt="${alt}"`);
|
| 250 |
+
if (caption) props.push(`caption={'${caption}'}`);
|
| 251 |
+
|
| 252 |
+
return `<Figure\n ${props.join('\n ')}\n/>`;
|
| 253 |
+
}
|
| 254 |
+
|
| 255 |
+
function transformImages(content) {
|
| 256 |
+
console.log(' 🖼️ Transforming images to Figure components with imports...');
|
| 257 |
+
|
| 258 |
+
let hasImages = false;
|
| 259 |
+
|
| 260 |
+
// Helper function to clean source paths
|
| 261 |
+
const cleanSrcPath = (src) => {
|
| 262 |
+
return src.replace(/.*\/output\/assets\//, './assets/')
|
| 263 |
+
.replace(/\/Users\/[^\/]+\/[^\/]+\/[^\/]+\/[^\/]+\/[^\/]+\/app\/scripts\/latex-to-markdown\/output\/assets\//, './assets/');
|
| 264 |
+
};
|
| 265 |
+
|
| 266 |
+
// Helper to clean caption text
|
| 267 |
+
const cleanCaption = (caption) => {
|
| 268 |
+
return caption
|
| 269 |
+
.replace(/<[^>]*>/g, '') // Remove HTML tags
|
| 270 |
+
.replace(/\n/g, ' ') // Replace newlines with spaces
|
| 271 |
+
.replace(/\r/g, ' ') // Replace carriage returns with spaces
|
| 272 |
+
.replace(/\s+/g, ' ') // Replace multiple spaces with single space
|
| 273 |
+
.replace(/'/g, "\\'") // Escape quotes
|
| 274 |
+
.trim(); // Trim whitespace
|
| 275 |
+
};
|
| 276 |
+
|
| 277 |
+
// Helper to clean alt text
|
| 278 |
+
const cleanAltText = (alt, maxLength = 100) => {
|
| 279 |
+
const cleaned = alt
|
| 280 |
+
.replace(/<[^>]*>/g, '') // Remove HTML tags
|
| 281 |
+
.replace(/\n/g, ' ') // Replace newlines with spaces
|
| 282 |
+
.replace(/\r/g, ' ') // Replace carriage returns with spaces
|
| 283 |
+
.replace(/\s+/g, ' ') // Replace multiple spaces with single space
|
| 284 |
+
.trim(); // Trim whitespace
|
| 285 |
+
|
| 286 |
+
return cleaned.length > maxLength
|
| 287 |
+
? cleaned.substring(0, maxLength) + '...'
|
| 288 |
+
: cleaned;
|
| 289 |
+
};
|
| 290 |
+
|
| 291 |
+
// 1. Transform complex HTML figures with style attributes
|
| 292 |
+
content = content.replace(
|
| 293 |
+
/<figure id="([^"]*)">\s*<img src="([^"]*)"(?:\s+style="([^"]*)")?\s*\/>\s*<figcaption>\s*(.*?)\s*<\/figcaption>\s*<\/figure>/gs,
|
| 294 |
+
(match, id, src, style, caption) => {
|
| 295 |
+
const cleanSrc = cleanSrcPath(src);
|
| 296 |
+
const cleanCap = cleanCaption(caption);
|
| 297 |
+
const altText = cleanAltText(cleanCap);
|
| 298 |
+
hasImages = true;
|
| 299 |
+
|
| 300 |
+
return createFigureComponent(cleanSrc, altText, id, cleanCap);
|
| 301 |
+
}
|
| 302 |
+
);
|
| 303 |
+
|
| 304 |
+
// 2. Transform standalone img tags with style
|
| 305 |
+
content = content.replace(
|
| 306 |
+
/<img src="([^"]*)"(?:\s+style="([^"]*)")?\s*(?:alt="([^"]*)")?\s*\/>/g,
|
| 307 |
+
(match, src, style, alt) => {
|
| 308 |
+
const cleanSrc = cleanSrcPath(src);
|
| 309 |
+
const cleanAlt = cleanAltText(alt || 'Figure');
|
| 310 |
+
hasImages = true;
|
| 311 |
+
|
| 312 |
+
return createFigureComponent(cleanSrc, cleanAlt);
|
| 313 |
+
}
|
| 314 |
+
);
|
| 315 |
+
|
| 316 |
+
// 3. Transform images within wrapfigure divs
|
| 317 |
+
content = content.replace(
|
| 318 |
+
/<div class="wrapfigure">\s*r[\d.]+\s*<img src="([^"]*)"[^>]*\/>\s*<\/div>/gs,
|
| 319 |
+
(match, src) => {
|
| 320 |
+
const cleanSrc = cleanSrcPath(src);
|
| 321 |
+
hasImages = true;
|
| 322 |
+
|
| 323 |
+
return createFigureComponent(cleanSrc, 'Figure');
|
| 324 |
+
}
|
| 325 |
+
);
|
| 326 |
+
|
| 327 |
+
// 4. Transform simple HTML figure/img without style
|
| 328 |
+
content = content.replace(
|
| 329 |
+
/<figure id="([^"]*)">\s*<img src="([^"]*)" \/>\s*<figcaption>\s*(.*?)\s*<\/figcaption>\s*<\/figure>/gs,
|
| 330 |
+
(match, id, src, caption) => {
|
| 331 |
+
const cleanSrc = cleanSrcPath(src);
|
| 332 |
+
const cleanCap = cleanCaption(caption);
|
| 333 |
+
const altText = cleanAltText(cleanCap);
|
| 334 |
+
hasImages = true;
|
| 335 |
+
|
| 336 |
+
return createFigureComponent(cleanSrc, altText, id, cleanCap);
|
| 337 |
+
}
|
| 338 |
+
);
|
| 339 |
+
|
| 340 |
+
// 5. Clean up figures with minipage divs
|
| 341 |
+
content = content.replace(
|
| 342 |
+
/<figure id="([^"]*)">\s*<div class="minipage">\s*<img src="([^"]*)"[^>]*\/>\s*<\/div>\s*<figcaption[^>]*>(.*?)<\/figcaption>\s*<\/figure>/gs,
|
| 343 |
+
(match, id, src, caption) => {
|
| 344 |
+
const cleanSrc = cleanSrcPath(src);
|
| 345 |
+
const cleanCap = cleanCaption(caption);
|
| 346 |
+
const altText = cleanAltText(cleanCap);
|
| 347 |
+
hasImages = true;
|
| 348 |
+
|
| 349 |
+
return createFigureComponent(cleanSrc, altText, id, cleanCap);
|
| 350 |
+
}
|
| 351 |
+
);
|
| 352 |
+
|
| 353 |
+
// 6. Transform Pandoc-style images: {#id attr="value"}
|
| 354 |
+
content = content.replace(
|
| 355 |
+
/!\[([^\]]*)\]\(([^)]+)\)(?:\{([^}]+)\})?/g,
|
| 356 |
+
(match, alt, src, attributes) => {
|
| 357 |
+
const cleanSrc = cleanSrcPath(src);
|
| 358 |
+
const cleanAlt = cleanAltText(alt || 'Figure');
|
| 359 |
+
hasImages = true;
|
| 360 |
+
|
| 361 |
+
let id = '';
|
| 362 |
+
if (attributes) {
|
| 363 |
+
const idMatch = attributes.match(/#([\w-]+)/);
|
| 364 |
+
if (idMatch) id = idMatch[1];
|
| 365 |
+
}
|
| 366 |
+
|
| 367 |
+
return createFigureComponent(cleanSrc, cleanAlt, id);
|
| 368 |
+
}
|
| 369 |
+
);
|
| 370 |
+
|
| 371 |
+
if (hasImages) {
|
| 372 |
+
console.log(' ✅ Figure components with imports will be created');
|
| 373 |
+
}
|
| 374 |
+
|
| 375 |
+
return content;
|
| 376 |
+
}
|
| 377 |
+
|
| 378 |
+
/**
|
| 379 |
+
* Transform HTML spans with style attributes to appropriate components
|
| 380 |
+
* @param {string} content - MDX content
|
| 381 |
+
* @returns {string} - Content with transformed spans
|
| 382 |
+
*/
|
| 383 |
+
function transformStyledSpans(content) {
|
| 384 |
+
console.log(' 🎨 Transforming styled spans...');
|
| 385 |
+
|
| 386 |
+
// Transform HTML spans with style attributes
|
| 387 |
+
content = content.replace(
|
| 388 |
+
/<span style="color: ([^"]+)">(.*?)<\/span>/g,
|
| 389 |
+
(match, color, text) => {
|
| 390 |
+
// Map colors to semantic classes or components
|
| 391 |
+
const colorMap = {
|
| 392 |
+
'hf2': 'text-hf-secondary',
|
| 393 |
+
'hf1': 'text-hf-primary'
|
| 394 |
+
};
|
| 395 |
+
|
| 396 |
+
const className = colorMap[color] || `text-${color}`;
|
| 397 |
+
return `<span class="${className}">${text}</span>`;
|
| 398 |
+
}
|
| 399 |
+
);
|
| 400 |
+
|
| 401 |
+
// Transform markdown spans with style attributes: [text]{style="color: color"}
|
| 402 |
+
content = content.replace(
|
| 403 |
+
/\[([^\]]+)\]\{style="color: ([^"]+)"\}/g,
|
| 404 |
+
(match, text, color) => {
|
| 405 |
+
// Map colors to semantic classes or components
|
| 406 |
+
const colorMap = {
|
| 407 |
+
'hf2': 'text-hf-secondary',
|
| 408 |
+
'hf1': 'text-hf-primary'
|
| 409 |
+
};
|
| 410 |
+
|
| 411 |
+
const className = colorMap[color] || `text-${color}`;
|
| 412 |
+
return `<span class="${className}">${text}</span>`;
|
| 413 |
+
}
|
| 414 |
+
);
|
| 415 |
+
|
| 416 |
+
return content;
|
| 417 |
+
}
|
| 418 |
+
|
| 419 |
+
/**
|
| 420 |
+
* Transform reference links to proper Astro internal links
|
| 421 |
+
* @param {string} content - MDX content
|
| 422 |
+
* @returns {string} - Content with transformed links
|
| 423 |
+
*/
|
| 424 |
+
function fixHtmlEscaping(content) {
|
| 425 |
+
console.log(' 🔧 Fixing HTML escaping in spans...');
|
| 426 |
+
|
| 427 |
+
let fixedCount = 0;
|
| 428 |
+
|
| 429 |
+
// Pattern 1: \<span id="..." style="..."\>\</span\>
|
| 430 |
+
content = content.replace(/\\<span id="([^"]*)" style="([^"]*)"\\>\\<\/span\\>/g, (match, id, style) => {
|
| 431 |
+
fixedCount++;
|
| 432 |
+
// Fix common style issues like "position- absolute;" -> "position: absolute;"
|
| 433 |
+
const cleanStyle = style.replace('position- absolute;', 'position: absolute;');
|
| 434 |
+
return `<span id="${id}" style="${cleanStyle}"></span>`;
|
| 435 |
+
});
|
| 436 |
+
|
| 437 |
+
// Pattern 2: \<span class="..."\>...\</span\>
|
| 438 |
+
content = content.replace(/\\<span class="([^"]*)"\\>([^\\]+)\\<\/span\\>/g, (match, className, text) => {
|
| 439 |
+
fixedCount++;
|
| 440 |
+
// Remove numbering like (1), (2), (3) from highlight spans
|
| 441 |
+
let cleanText = text;
|
| 442 |
+
if (className === 'highlight') {
|
| 443 |
+
cleanText = text.replace(/^\(\d+\)\s*/, '');
|
| 444 |
+
}
|
| 445 |
+
return `<span class="${className}">${cleanText}</span>`;
|
| 446 |
+
});
|
| 447 |
+
|
| 448 |
+
// Pattern 3: HTML-encoded spans in paragraph tags
|
| 449 |
+
// <p><span id="..." style="..."></span></p>
|
| 450 |
+
content = content.replace(/<p><span id="([^"]*)" style="([^"]*)"><\/span><\/p>/g, (match, id, style) => {
|
| 451 |
+
fixedCount++;
|
| 452 |
+
// Fix common style issues like "position- absolute;" -> "position: absolute;"
|
| 453 |
+
const cleanStyle = style.replace('position- absolute;', 'position: absolute;');
|
| 454 |
+
return `<span id="${id}" style="${cleanStyle}"></span>`;
|
| 455 |
+
});
|
| 456 |
+
|
| 457 |
+
// Pattern 4: HTML-encoded spans with class in paragraph tags
|
| 458 |
+
// <p><span class="...">...</span></p>
|
| 459 |
+
content = content.replace(/<p><span class="([^"]*)">([^&]*)<\/span><\/p>/g, (match, className, text) => {
|
| 460 |
+
fixedCount++;
|
| 461 |
+
// Remove numbering like (1), (2), (3) from highlight spans
|
| 462 |
+
let cleanText = text;
|
| 463 |
+
if (className === 'highlight') {
|
| 464 |
+
cleanText = text.replace(/^\(\d+\)\s*/, '');
|
| 465 |
+
}
|
| 466 |
+
return `<span class="${className}">${cleanText}</span>`;
|
| 467 |
+
});
|
| 468 |
+
|
| 469 |
+
if (fixedCount > 0) {
|
| 470 |
+
console.log(` ✅ Fixed ${fixedCount} escaped span(s)`);
|
| 471 |
+
}
|
| 472 |
+
|
| 473 |
+
return content;
|
| 474 |
+
}
|
| 475 |
+
|
| 476 |
+
function cleanHighlightNumbering(content) {
|
| 477 |
+
console.log(' 🔢 Removing numbering from highlight spans...');
|
| 478 |
+
|
| 479 |
+
let cleanedCount = 0;
|
| 480 |
+
// Clean numbering from non-escaped highlight spans too
|
| 481 |
+
content = content.replace(/<span class="highlight">(\(\d+\)\s*)([^<]+)<\/span>/g, (match, numbering, text) => {
|
| 482 |
+
cleanedCount++;
|
| 483 |
+
return `<span class="highlight">${text}</span>`;
|
| 484 |
+
});
|
| 485 |
+
|
| 486 |
+
if (cleanedCount > 0) {
|
| 487 |
+
console.log(` ✅ Removed numbering from ${cleanedCount} highlight span(s)`);
|
| 488 |
+
}
|
| 489 |
+
|
| 490 |
+
return content;
|
| 491 |
+
}
|
| 492 |
+
|
| 493 |
+
function transformReferenceLinks(content) {
|
| 494 |
+
console.log(' 🔗 Transforming reference links...');
|
| 495 |
+
|
| 496 |
+
// Transform Pandoc reference links: [text](#ref){reference-type="ref" reference="ref"}
|
| 497 |
+
return content.replace(
|
| 498 |
+
/\[([^\]]+)\]\((#[^)]+)\)\{[^}]*reference[^}]*\}/g,
|
| 499 |
+
(match, text, href) => {
|
| 500 |
+
return `[${text}](${href})`;
|
| 501 |
+
}
|
| 502 |
+
);
|
| 503 |
+
}
|
| 504 |
+
|
| 505 |
+
|
| 506 |
+
/**
|
| 507 |
+
* Fix frontmatter and ensure proper MDX format
|
| 508 |
+
* @param {string} content - MDX content
|
| 509 |
+
* @param {string} latexContent - Original LaTeX content for metadata extraction
|
| 510 |
+
* @returns {string} - Content with proper frontmatter
|
| 511 |
+
*/
|
| 512 |
+
function ensureFrontmatter(content, latexContent = '') {
|
| 513 |
+
console.log(' 📄 Ensuring proper frontmatter...');
|
| 514 |
+
|
| 515 |
+
if (!content.startsWith('---')) {
|
| 516 |
+
let frontmatter;
|
| 517 |
+
|
| 518 |
+
if (latexContent) {
|
| 519 |
+
// Extract metadata from LaTeX using dedicated module
|
| 520 |
+
frontmatter = extractAndGenerateFrontmatter(latexContent);
|
| 521 |
+
console.log(' ✅ Generated frontmatter from LaTeX metadata');
|
| 522 |
+
} else {
|
| 523 |
+
// Fallback frontmatter
|
| 524 |
+
const currentDate = new Date().toLocaleDateString('en-US', {
|
| 525 |
+
year: 'numeric',
|
| 526 |
+
month: 'short',
|
| 527 |
+
day: '2-digit'
|
| 528 |
+
});
|
| 529 |
+
frontmatter = `---
|
| 530 |
+
title: "Research Article"
|
| 531 |
+
published: "${currentDate}"
|
| 532 |
+
tableOfContentsAutoCollapse: true
|
| 533 |
+
---
|
| 534 |
+
|
| 535 |
+
`;
|
| 536 |
+
console.log(' ✅ Generated basic frontmatter');
|
| 537 |
+
}
|
| 538 |
+
|
| 539 |
+
return frontmatter + content;
|
| 540 |
+
}
|
| 541 |
+
|
| 542 |
+
return content;
|
| 543 |
+
}
|
| 544 |
+
|
| 545 |
+
/**
|
| 546 |
+
* Fix mixed math delimiters like $`...`$ or `...`$
|
| 547 |
+
* @param {string} content - MDX content
|
| 548 |
+
* @returns {string} - Content with fixed math delimiters
|
| 549 |
+
*/
|
| 550 |
+
function fixMixedMathDelimiters(content) {
|
| 551 |
+
console.log(' 🔧 Fixing mixed math delimiters...');
|
| 552 |
+
|
| 553 |
+
let fixedCount = 0;
|
| 554 |
+
|
| 555 |
+
// Fix patterns like $`...`$ (mixed delimiters)
|
| 556 |
+
content = content.replace(/\$`([^`]*)`\$/g, (match, mathContent) => {
|
| 557 |
+
fixedCount++;
|
| 558 |
+
return `$${mathContent}$`;
|
| 559 |
+
});
|
| 560 |
+
|
| 561 |
+
// Fix patterns like `...`$ (backtick start, dollar end)
|
| 562 |
+
content = content.replace(/`([^`]*)`\$/g, (match, mathContent) => {
|
| 563 |
+
fixedCount++;
|
| 564 |
+
return `$${mathContent}$`;
|
| 565 |
+
});
|
| 566 |
+
|
| 567 |
+
// Fix patterns like $`...` (dollar start, backtick end - less common)
|
| 568 |
+
content = content.replace(/\$`([^`]*)`(?!\$)/g, (match, mathContent) => {
|
| 569 |
+
fixedCount++;
|
| 570 |
+
return `$${mathContent}$`;
|
| 571 |
+
});
|
| 572 |
+
|
| 573 |
+
if (fixedCount > 0) {
|
| 574 |
+
console.log(` ✅ Fixed ${fixedCount} mixed math delimiter(s)`);
|
| 575 |
+
}
|
| 576 |
+
|
| 577 |
+
return content;
|
| 578 |
+
}
|
| 579 |
+
|
| 580 |
+
/**
|
| 581 |
+
* Clean up orphaned math delimiters and fix mixed content
|
| 582 |
+
* @param {string} content - MDX content
|
| 583 |
+
* @returns {string} - Content with cleaned math blocks
|
| 584 |
+
*/
|
| 585 |
+
function cleanOrphanedMathDelimiters(content) {
|
| 586 |
+
console.log(' 🧹 Cleaning orphaned math delimiters...');
|
| 587 |
+
console.log(' 🔍 Content length:', content.length, 'chars');
|
| 588 |
+
|
| 589 |
+
let fixedCount = 0;
|
| 590 |
+
|
| 591 |
+
// Fix orphaned $$ that are alone on lines (but not part of display math blocks)
|
| 592 |
+
// Only remove $$ that appear alone without corresponding closing $$
|
| 593 |
+
content = content.replace(/^\$\$\s*$(?!\s*[\s\S]*?\$\$)/gm, () => {
|
| 594 |
+
fixedCount++;
|
| 595 |
+
return '';
|
| 596 |
+
});
|
| 597 |
+
|
| 598 |
+
// Fix backticks inside $$....$$ blocks (Pandoc artifact)
|
| 599 |
+
const mathMatches = content.match(/\$\$([\s\S]*?)\$\$/g);
|
| 600 |
+
console.log(` 🔍 Found ${mathMatches ? mathMatches.length : 0} math blocks`);
|
| 601 |
+
|
| 602 |
+
content = content.replace(/\$\$([\s\S]*?)\$\$/g, (match, mathContent) => {
|
| 603 |
+
// More aggressive: remove ALL single backticks in math blocks (they shouldn't be there)
|
| 604 |
+
let cleanedMath = mathContent;
|
| 605 |
+
|
| 606 |
+
// Count backticks before
|
| 607 |
+
const backticksBefore = (mathContent.match(/`/g) || []).length;
|
| 608 |
+
|
| 609 |
+
if (backticksBefore > 0) {
|
| 610 |
+
console.log(` 🔧 Found math block with ${backticksBefore} backtick(s)`);
|
| 611 |
+
}
|
| 612 |
+
|
| 613 |
+
// Remove all isolated backticks (not in pairs)
|
| 614 |
+
cleanedMath = cleanedMath.replace(/`/g, '');
|
| 615 |
+
|
| 616 |
+
const backticksAfter = (cleanedMath.match(/`/g) || []).length;
|
| 617 |
+
|
| 618 |
+
if (backticksBefore > 0) {
|
| 619 |
+
fixedCount++;
|
| 620 |
+
console.log(` 🔧 Removed ${backticksBefore} backtick(s) from math block`);
|
| 621 |
+
return `$$${cleanedMath}$$`;
|
| 622 |
+
}
|
| 623 |
+
return match;
|
| 624 |
+
});
|
| 625 |
+
|
| 626 |
+
// Fix escaped align in math blocks: \begin{align} -> \begin{align}
|
| 627 |
+
content = content.replace(/\\begin\{align\}/g, (match) => {
|
| 628 |
+
fixedCount++;
|
| 629 |
+
return '\\begin{align}';
|
| 630 |
+
});
|
| 631 |
+
|
| 632 |
+
content = content.replace(/\\end\{align\}/g, (match) => {
|
| 633 |
+
fixedCount++;
|
| 634 |
+
return '\\end{align}';
|
| 635 |
+
});
|
| 636 |
+
|
| 637 |
+
// Fix cases where text gets mixed with math blocks
|
| 638 |
+
// Pattern: ``` math ... ``` text ``` math
|
| 639 |
+
content = content.replace(/``` math\s*\n([\s\S]*?)\n```\s*([^`\n]*?)\s*``` math/g, (match, math1, text, math2) => {
|
| 640 |
+
if (text.trim().length > 0 && !text.includes('```')) {
|
| 641 |
+
fixedCount++;
|
| 642 |
+
return '```' + ' math\n' + math1 + '\n```\n\n' + text.trim() + '\n\n```' + ' math';
|
| 643 |
+
}
|
| 644 |
+
return match;
|
| 645 |
+
});
|
| 646 |
+
|
| 647 |
+
if (fixedCount > 0) {
|
| 648 |
+
console.log(` ✅ Fixed ${fixedCount} orphaned math delimiter(s)`);
|
| 649 |
+
}
|
| 650 |
+
|
| 651 |
+
return content;
|
| 652 |
+
}
|
| 653 |
+
|
| 654 |
+
/**
|
| 655 |
+
* Clean newlines from single-dollar math blocks ($...$) ONLY
|
| 656 |
+
* @param {string} content - MDX content
|
| 657 |
+
* @returns {string} - Content with cleaned math blocks
|
| 658 |
+
*/
|
| 659 |
+
function cleanSingleLineMathNewlines(content) {
|
| 660 |
+
console.log(' 🔢 Cleaning newlines in single-dollar math blocks ($...$)...');
|
| 661 |
+
|
| 662 |
+
let cleanedCount = 0;
|
| 663 |
+
|
| 664 |
+
// ULTRA STRICT: Only target single dollar blocks ($...$) that contain newlines
|
| 665 |
+
// Use dotall flag (s) to match newlines with .*, and ensure we don't match $$
|
| 666 |
+
const cleanedContent = content.replace(/\$(?!\$)([\s\S]*?)\$(?!\$)/g, (match, mathContent) => {
|
| 667 |
+
// Only process if the content contains newlines
|
| 668 |
+
if (mathContent.includes('\n')) {
|
| 669 |
+
cleanedCount++;
|
| 670 |
+
|
| 671 |
+
// Remove ALL newlines and carriage returns, normalize whitespace
|
| 672 |
+
const cleanedMath = mathContent
|
| 673 |
+
.replace(/\n+/g, ' ') // Replace all newlines with spaces
|
| 674 |
+
.replace(/\r+/g, ' ') // Replace carriage returns with spaces
|
| 675 |
+
.replace(/\s+/g, ' ') // Normalize multiple spaces to single
|
| 676 |
+
.trim(); // Remove leading/trailing spaces
|
| 677 |
+
|
| 678 |
+
return `$${cleanedMath}$`;
|
| 679 |
+
}
|
| 680 |
+
return match; // Keep original if no newlines
|
| 681 |
+
});
|
| 682 |
+
|
| 683 |
+
if (cleanedCount > 0) {
|
| 684 |
+
console.log(` ✅ Cleaned ${cleanedCount} single-dollar math block(s) with newlines`);
|
| 685 |
+
}
|
| 686 |
+
|
| 687 |
+
return cleanedContent;
|
| 688 |
+
}
|
| 689 |
+
|
| 690 |
+
/**
|
| 691 |
+
* Add proper line breaks around display math blocks ($$...$$)
|
| 692 |
+
* @param {string} content - MDX content
|
| 693 |
+
* @returns {string} - Content with properly spaced display math
|
| 694 |
+
*/
|
| 695 |
+
function formatDisplayMathBlocks(content) {
|
| 696 |
+
console.log(' 📐 Formatting display math blocks with proper spacing...');
|
| 697 |
+
|
| 698 |
+
let formattedCount = 0;
|
| 699 |
+
|
| 700 |
+
// Find all $$...$$$ blocks (display math) and ensure proper line breaks
|
| 701 |
+
// Very strict: only matches exactly $$ followed by content followed by $$
|
| 702 |
+
const formattedContent = content.replace(/\$\$([\s\S]*?)\$\$/g, (match, mathContent) => {
|
| 703 |
+
formattedCount++;
|
| 704 |
+
|
| 705 |
+
// Clean up the math content - trim whitespace but preserve structure
|
| 706 |
+
const cleanedMath = mathContent.trim();
|
| 707 |
+
|
| 708 |
+
// Return with proper line breaks before and after
|
| 709 |
+
return `\n$$\n${cleanedMath}\n$$\n`;
|
| 710 |
+
});
|
| 711 |
+
|
| 712 |
+
if (formattedCount > 0) {
|
| 713 |
+
console.log(` ✅ Formatted ${formattedCount} display math block(s) with proper spacing`);
|
| 714 |
+
}
|
| 715 |
+
|
| 716 |
+
return formattedContent;
|
| 717 |
+
}
|
| 718 |
+
|
| 719 |
+
/**
|
| 720 |
+
* Clean newlines from figcaption content
|
| 721 |
+
* @param {string} content - MDX content
|
| 722 |
+
* @returns {string} - Content with cleaned figcaptions
|
| 723 |
+
*/
|
| 724 |
+
function cleanFigcaptionNewlines(content) {
|
| 725 |
+
console.log(' 📝 Cleaning newlines in figcaption elements...');
|
| 726 |
+
|
| 727 |
+
let cleanedCount = 0;
|
| 728 |
+
|
| 729 |
+
// Find all <figcaption>...</figcaption> blocks and remove internal newlines
|
| 730 |
+
const cleanedContent = content.replace(/<figcaption([^>]*)>([\s\S]*?)<\/figcaption>/g, (match, attributes, captionContent) => {
|
| 731 |
+
// Only process if the content contains newlines
|
| 732 |
+
if (captionContent.includes('\n')) {
|
| 733 |
+
cleanedCount++;
|
| 734 |
+
|
| 735 |
+
// Remove newlines and normalize whitespace
|
| 736 |
+
const cleanedCaption = captionContent
|
| 737 |
+
.replace(/\n+/g, ' ') // Replace newlines with spaces
|
| 738 |
+
.replace(/\s+/g, ' ') // Normalize multiple spaces
|
| 739 |
+
.trim(); // Trim whitespace
|
| 740 |
+
|
| 741 |
+
return `<figcaption${attributes}>${cleanedCaption}</figcaption>`;
|
| 742 |
+
}
|
| 743 |
+
|
| 744 |
+
return match; // Return unchanged if no newlines
|
| 745 |
+
});
|
| 746 |
+
|
| 747 |
+
if (cleanedCount > 0) {
|
| 748 |
+
console.log(` ✅ Cleaned ${cleanedCount} figcaption element(s)`);
|
| 749 |
+
} else {
|
| 750 |
+
console.log(` ℹ️ No figcaption elements with newlines found`);
|
| 751 |
+
}
|
| 752 |
+
|
| 753 |
+
return cleanedContent;
|
| 754 |
+
}
|
| 755 |
+
|
| 756 |
+
/**
|
| 757 |
+
* Remove HTML comments from MDX content
|
| 758 |
+
* @param {string} content - MDX content
|
| 759 |
+
* @returns {string} - Content without HTML comments
|
| 760 |
+
*/
|
| 761 |
+
function removeHtmlComments(content) {
|
| 762 |
+
console.log(' 🗑️ Removing HTML comments...');
|
| 763 |
+
|
| 764 |
+
let removedCount = 0;
|
| 765 |
+
|
| 766 |
+
// Remove all HTML comments <!-- ... -->
|
| 767 |
+
const cleanedContent = content.replace(/<!--[\s\S]*?-->/g, () => {
|
| 768 |
+
removedCount++;
|
| 769 |
+
return '';
|
| 770 |
+
});
|
| 771 |
+
|
| 772 |
+
if (removedCount > 0) {
|
| 773 |
+
console.log(` ✅ Removed ${removedCount} HTML comment(s)`);
|
| 774 |
+
}
|
| 775 |
+
|
| 776 |
+
return cleanedContent;
|
| 777 |
+
}
|
| 778 |
+
|
| 779 |
+
/**
|
| 780 |
+
* Clean up MDX-incompatible syntax
|
| 781 |
+
* @param {string} content - MDX content
|
| 782 |
+
* @returns {string} - Cleaned content
|
| 783 |
+
*/
|
| 784 |
+
function cleanMdxSyntax(content) {
|
| 785 |
+
console.log(' 🧹 Cleaning MDX syntax...');
|
| 786 |
+
|
| 787 |
+
return content
|
| 788 |
+
// NOTE: Math delimiter fixing is now handled by fixMixedMathDelimiters()
|
| 789 |
+
// Ensure proper spacing around JSX-like constructs
|
| 790 |
+
.replace(/>\s*</g, '>\n<')
|
| 791 |
+
// Remove problematic heading attributes - be more specific to avoid matching \begin{align}
|
| 792 |
+
.replace(/^(#{1,6}\s+[^{#\n]+)\{[^}]+\}$/gm, '$1')
|
| 793 |
+
// Fix escaped quotes in text
|
| 794 |
+
.replace(/\\("|')/g, '$1');
|
| 795 |
+
}
|
| 796 |
+
|
| 797 |
+
/**
|
| 798 |
+
* Main MDX processing function that applies all transformations
|
| 799 |
+
* @param {string} content - Raw Markdown content
|
| 800 |
+
* @param {string} latexContent - Original LaTeX content for metadata extraction
|
| 801 |
+
* @returns {string} - Processed MDX content compatible with Astro
|
| 802 |
+
*/
|
| 803 |
+
function processMdxContent(content, latexContent = '') {
|
| 804 |
+
console.log('🔧 Processing for Astro MDX compatibility...');
|
| 805 |
+
|
| 806 |
+
// Clear previous tracking
|
| 807 |
+
usedComponents.clear();
|
| 808 |
+
imageImports.clear();
|
| 809 |
+
|
| 810 |
+
let processedContent = content;
|
| 811 |
+
|
| 812 |
+
// Apply each transformation step sequentially
|
| 813 |
+
processedContent = ensureFrontmatter(processedContent, latexContent);
|
| 814 |
+
processedContent = fixMixedMathDelimiters(processedContent);
|
| 815 |
+
|
| 816 |
+
// Debug: check for $$ blocks after fixMixedMathDelimiters
|
| 817 |
+
const mathBlocksAfterMixed = (processedContent.match(/\$\$([\s\S]*?)\$\$/g) || []).length;
|
| 818 |
+
console.log(` 📊 Math blocks after mixed delimiters fix: ${mathBlocksAfterMixed}`);
|
| 819 |
+
|
| 820 |
+
processedContent = cleanOrphanedMathDelimiters(processedContent);
|
| 821 |
+
processedContent = cleanSingleLineMathNewlines(processedContent);
|
| 822 |
+
processedContent = formatDisplayMathBlocks(processedContent);
|
| 823 |
+
processedContent = removeHtmlComments(processedContent);
|
| 824 |
+
processedContent = cleanMdxSyntax(processedContent);
|
| 825 |
+
processedContent = convertSubfiguresToMultiFigure(processedContent);
|
| 826 |
+
processedContent = transformImages(processedContent);
|
| 827 |
+
processedContent = transformStyledSpans(processedContent);
|
| 828 |
+
processedContent = transformReferenceLinks(processedContent);
|
| 829 |
+
processedContent = fixHtmlEscaping(processedContent);
|
| 830 |
+
processedContent = cleanHighlightNumbering(processedContent);
|
| 831 |
+
processedContent = cleanFigcaptionNewlines(processedContent);
|
| 832 |
+
|
| 833 |
+
// Add component imports at the end
|
| 834 |
+
processedContent = addComponentImports(processedContent);
|
| 835 |
+
|
| 836 |
+
return processedContent;
|
| 837 |
+
}
|
| 838 |
+
|
| 839 |
+
function convertToMdx(inputFile, outputFile) {
|
| 840 |
+
console.log('📝 Modular Markdown to Astro MDX Converter');
|
| 841 |
+
console.log(`📁 Input: ${inputFile}`);
|
| 842 |
+
console.log(`📁 Output: ${outputFile}`);
|
| 843 |
+
|
| 844 |
+
// Check if input file exists
|
| 845 |
+
if (!existsSync(inputFile)) {
|
| 846 |
+
console.error(`❌ Input file not found: ${inputFile}`);
|
| 847 |
+
process.exit(1);
|
| 848 |
+
}
|
| 849 |
+
|
| 850 |
+
try {
|
| 851 |
+
console.log('🔄 Reading Markdown file...');
|
| 852 |
+
const markdownContent = readFileSync(inputFile, 'utf8');
|
| 853 |
+
|
| 854 |
+
// Try to read original LaTeX file for metadata extraction
|
| 855 |
+
let latexContent = '';
|
| 856 |
+
try {
|
| 857 |
+
const inputDir = dirname(inputFile);
|
| 858 |
+
const latexFile = join(inputDir, '..', 'input', 'main.tex');
|
| 859 |
+
if (existsSync(latexFile)) {
|
| 860 |
+
latexContent = readFileSync(latexFile, 'utf8');
|
| 861 |
+
}
|
| 862 |
+
} catch (error) {
|
| 863 |
+
// Ignore LaTeX reading errors - we'll use fallback frontmatter
|
| 864 |
+
}
|
| 865 |
+
|
| 866 |
+
// Apply modular MDX processing
|
| 867 |
+
const mdxContent = processMdxContent(markdownContent, latexContent);
|
| 868 |
+
|
| 869 |
+
console.log('💾 Writing MDX file...');
|
| 870 |
+
writeFileSync(outputFile, mdxContent);
|
| 871 |
+
|
| 872 |
+
console.log(`✅ Conversion completed: ${outputFile}`);
|
| 873 |
+
|
| 874 |
+
// Show file size
|
| 875 |
+
const inputSize = Math.round(markdownContent.length / 1024);
|
| 876 |
+
const outputSize = Math.round(mdxContent.length / 1024);
|
| 877 |
+
console.log(`📊 Input: ${inputSize}KB → Output: ${outputSize}KB`);
|
| 878 |
+
|
| 879 |
+
} catch (error) {
|
| 880 |
+
console.error('❌ Conversion failed:');
|
| 881 |
+
console.error(error.message);
|
| 882 |
+
process.exit(1);
|
| 883 |
+
}
|
| 884 |
+
}
|
| 885 |
+
|
| 886 |
+
export { convertToMdx };
|
| 887 |
+
|
| 888 |
+
function main() {
|
| 889 |
+
const config = parseArgs();
|
| 890 |
+
convertToMdx(config.input, config.output);
|
| 891 |
+
console.log('🎉 MDX conversion completed!');
|
| 892 |
+
}
|
| 893 |
+
|
| 894 |
+
if (import.meta.url === `file://${process.argv[1]}`) {
|
| 895 |
+
main();
|
| 896 |
+
}
|
app/scripts/latex-importer/metadata-extractor.mjs
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* LaTeX Metadata Extractor
|
| 3 |
+
* Extracts document metadata from LaTeX files for frontmatter generation
|
| 4 |
+
*/
|
| 5 |
+
|
| 6 |
+
/**
|
| 7 |
+
* Extract metadata from LaTeX content
|
| 8 |
+
* @param {string} latexContent - Raw LaTeX content
|
| 9 |
+
* @returns {object} - Extracted metadata object
|
| 10 |
+
*/
|
| 11 |
+
export function extractLatexMetadata(latexContent) {
|
| 12 |
+
const metadata = {};
|
| 13 |
+
|
| 14 |
+
// Extract title
|
| 15 |
+
const titleMatch = latexContent.match(/\\title\s*\{\s*([^}]+)\s*\}/s);
|
| 16 |
+
if (titleMatch) {
|
| 17 |
+
metadata.title = titleMatch[1]
|
| 18 |
+
.replace(/\n/g, ' ')
|
| 19 |
+
.trim();
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
// Extract authors with their specific affiliations
|
| 23 |
+
const authors = [];
|
| 24 |
+
const authorMatches = latexContent.matchAll(/\\authorOne\[[^\]]*\]\{([^}]+)\}/g);
|
| 25 |
+
|
| 26 |
+
for (const match of authorMatches) {
|
| 27 |
+
const fullAuthorInfo = match[1];
|
| 28 |
+
|
| 29 |
+
// Determine affiliations based on macros present
|
| 30 |
+
const affiliations = [];
|
| 31 |
+
if (fullAuthorInfo.includes('\\ensps')) {
|
| 32 |
+
affiliations.push(1); // École Normale Supérieure
|
| 33 |
+
}
|
| 34 |
+
if (fullAuthorInfo.includes('\\hf')) {
|
| 35 |
+
affiliations.push(2); // Hugging Face
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
// Clean author name by removing macros
|
| 39 |
+
let authorName = fullAuthorInfo
|
| 40 |
+
.replace(/\\ensps/g, '') // Remove École macro
|
| 41 |
+
.replace(/\\hf/g, '') // Remove Hugging Face macro
|
| 42 |
+
.replace(/\s+/g, ' ') // Normalize whitespace
|
| 43 |
+
.trim();
|
| 44 |
+
|
| 45 |
+
// Skip empty authors or placeholder entries
|
| 46 |
+
if (authorName && authorName !== '...') {
|
| 47 |
+
authors.push({
|
| 48 |
+
name: authorName,
|
| 49 |
+
affiliations: affiliations.length > 0 ? affiliations : [2] // Default to HF if no macro
|
| 50 |
+
});
|
| 51 |
+
}
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
if (authors.length > 0) {
|
| 55 |
+
metadata.authors = authors;
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
// Extract affiliations - create the two distinct affiliations
|
| 59 |
+
metadata.affiliations = [
|
| 60 |
+
{
|
| 61 |
+
name: "École Normale Supérieure Paris-Saclay"
|
| 62 |
+
},
|
| 63 |
+
{
|
| 64 |
+
name: "Hugging Face"
|
| 65 |
+
}
|
| 66 |
+
];
|
| 67 |
+
|
| 68 |
+
// Extract date if available (common LaTeX patterns)
|
| 69 |
+
const datePatterns = [
|
| 70 |
+
/\\date\s*\{([^}]+)\}/,
|
| 71 |
+
/\\newcommand\s*\{\\date\}\s*\{([^}]+)\}/,
|
| 72 |
+
];
|
| 73 |
+
|
| 74 |
+
for (const pattern of datePatterns) {
|
| 75 |
+
const dateMatch = latexContent.match(pattern);
|
| 76 |
+
if (dateMatch) {
|
| 77 |
+
metadata.published = dateMatch[1].trim();
|
| 78 |
+
break;
|
| 79 |
+
}
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
// Fallback to current date if no date found
|
| 83 |
+
if (!metadata.published) {
|
| 84 |
+
metadata.published = new Date().toLocaleDateString('en-US', {
|
| 85 |
+
year: 'numeric',
|
| 86 |
+
month: 'short',
|
| 87 |
+
day: '2-digit'
|
| 88 |
+
});
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
return metadata;
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
/**
|
| 95 |
+
* Generate YAML frontmatter from metadata object
|
| 96 |
+
* @param {object} metadata - Metadata object
|
| 97 |
+
* @returns {string} - YAML frontmatter string
|
| 98 |
+
*/
|
| 99 |
+
export function generateFrontmatter(metadata) {
|
| 100 |
+
let frontmatter = '---\n';
|
| 101 |
+
|
| 102 |
+
// Title
|
| 103 |
+
if (metadata.title) {
|
| 104 |
+
frontmatter += `title: "${metadata.title}"\n`;
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
// Authors
|
| 108 |
+
if (metadata.authors && metadata.authors.length > 0) {
|
| 109 |
+
frontmatter += 'authors:\n';
|
| 110 |
+
metadata.authors.forEach(author => {
|
| 111 |
+
frontmatter += ` - name: "${author.name}"\n`;
|
| 112 |
+
if (author.url) {
|
| 113 |
+
frontmatter += ` url: "${author.url}"\n`;
|
| 114 |
+
}
|
| 115 |
+
frontmatter += ` affiliations: [${author.affiliations.join(', ')}]\n`;
|
| 116 |
+
});
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
// Affiliations
|
| 120 |
+
if (metadata.affiliations && metadata.affiliations.length > 0) {
|
| 121 |
+
frontmatter += 'affiliations:\n';
|
| 122 |
+
metadata.affiliations.forEach((affiliation, index) => {
|
| 123 |
+
frontmatter += ` - name: "${affiliation.name}"\n`;
|
| 124 |
+
if (affiliation.url) {
|
| 125 |
+
frontmatter += ` url: "${affiliation.url}"\n`;
|
| 126 |
+
}
|
| 127 |
+
});
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
// Publication date
|
| 131 |
+
if (metadata.published) {
|
| 132 |
+
frontmatter += `published: "${metadata.published}"\n`;
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
// Additional metadata
|
| 136 |
+
if (metadata.doi) {
|
| 137 |
+
frontmatter += `doi: "${metadata.doi}"\n`;
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
if (metadata.description) {
|
| 141 |
+
frontmatter += `description: "${metadata.description}"\n`;
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
if (metadata.licence) {
|
| 145 |
+
frontmatter += `licence: >\n ${metadata.licence}\n`;
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
if (metadata.tags && metadata.tags.length > 0) {
|
| 149 |
+
frontmatter += 'tags:\n';
|
| 150 |
+
metadata.tags.forEach(tag => {
|
| 151 |
+
frontmatter += ` - ${tag}\n`;
|
| 152 |
+
});
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
// Default Astro configuration
|
| 156 |
+
frontmatter += 'tableOfContentsAutoCollapse: true\n';
|
| 157 |
+
frontmatter += '---\n\n';
|
| 158 |
+
|
| 159 |
+
return frontmatter;
|
| 160 |
+
}
|
| 161 |
+
|
| 162 |
+
/**
|
| 163 |
+
* Extract and generate frontmatter from LaTeX content
|
| 164 |
+
* @param {string} latexContent - Raw LaTeX content
|
| 165 |
+
* @returns {string} - Complete YAML frontmatter
|
| 166 |
+
*/
|
| 167 |
+
export function extractAndGenerateFrontmatter(latexContent) {
|
| 168 |
+
const metadata = extractLatexMetadata(latexContent);
|
| 169 |
+
return generateFrontmatter(metadata);
|
| 170 |
+
}
|
app/scripts/latex-importer/package-lock.json
ADDED
|
Binary file (56.7 kB). View file
|
|
|
app/scripts/latex-importer/package.json
ADDED
|
Binary file (967 Bytes). View file
|
|
|
app/scripts/latex-importer/post-processor.mjs
ADDED
|
@@ -0,0 +1,439 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env node
|
| 2 |
+
|
| 3 |
+
import { readFileSync, writeFileSync, existsSync, readdirSync } from 'fs';
|
| 4 |
+
import { join, dirname } from 'path';
|
| 5 |
+
import { fileURLToPath } from 'url';
|
| 6 |
+
|
| 7 |
+
const __filename = fileURLToPath(import.meta.url);
|
| 8 |
+
const __dirname = dirname(__filename);
|
| 9 |
+
|
| 10 |
+
/**
|
| 11 |
+
* Post-processor for cleaning Markdown content from LaTeX conversion
|
| 12 |
+
* Each function handles a specific type of cleanup for maintainability
|
| 13 |
+
*/
|
| 14 |
+
|
| 15 |
+
/**
|
| 16 |
+
* Remove TeX low-level grouping commands that break KaTeX
|
| 17 |
+
* @param {string} content - Markdown content
|
| 18 |
+
* @returns {string} - Cleaned content
|
| 19 |
+
*/
|
| 20 |
+
function removeTexGroupingCommands(content) {
|
| 21 |
+
console.log(' 🧹 Removing TeX grouping commands...');
|
| 22 |
+
|
| 23 |
+
return content
|
| 24 |
+
.replace(/\\mathopen\{\}\\mathclose\\bgroup/g, '')
|
| 25 |
+
.replace(/\\aftergroup\\egroup/g, '')
|
| 26 |
+
.replace(/\\bgroup/g, '')
|
| 27 |
+
.replace(/\\egroup/g, '');
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
/**
|
| 31 |
+
* Simplify LaTeX delimiter constructions
|
| 32 |
+
* @param {string} content - Markdown content
|
| 33 |
+
* @returns {string} - Cleaned content
|
| 34 |
+
*/
|
| 35 |
+
function simplifyLatexDelimiters(content) {
|
| 36 |
+
console.log(' 🔧 Simplifying LaTeX delimiters...');
|
| 37 |
+
|
| 38 |
+
return content
|
| 39 |
+
.replace(/\\left\[\s*/g, '[')
|
| 40 |
+
.replace(/\s*\\right\]/g, ']');
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
/**
|
| 44 |
+
* Remove orphaned LaTeX labels
|
| 45 |
+
* @param {string} content - Markdown content
|
| 46 |
+
* @returns {string} - Cleaned content
|
| 47 |
+
*/
|
| 48 |
+
function removeOrphanedLabels(content) {
|
| 49 |
+
console.log(' 🏷️ Removing orphaned labels...');
|
| 50 |
+
|
| 51 |
+
return content
|
| 52 |
+
.replace(/^\s*\\label\{[^}]+\}\s*$/gm, '')
|
| 53 |
+
.replace(/\\label\{[^}]+\}/g, '');
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
/**
|
| 57 |
+
* Fix KaTeX-incompatible math commands
|
| 58 |
+
* @param {string} content - Markdown content
|
| 59 |
+
* @returns {string} - Cleaned content
|
| 60 |
+
*/
|
| 61 |
+
function fixMathCommands(content) {
|
| 62 |
+
console.log(' 📐 Fixing KaTeX-incompatible math commands...');
|
| 63 |
+
|
| 64 |
+
return content
|
| 65 |
+
// Replace \hdots with \ldots (KaTeX compatible)
|
| 66 |
+
.replace(/\\hdots/g, '\\ldots')
|
| 67 |
+
// Add more math command fixes here as needed
|
| 68 |
+
.replace(/\\vdots/g, '\\vdots'); // This one should be fine, but kept for consistency
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
/**
|
| 72 |
+
* Convert LaTeX matrix commands to KaTeX-compatible environments
|
| 73 |
+
* @param {string} content - Markdown content
|
| 74 |
+
* @returns {string} - Content with fixed matrix commands
|
| 75 |
+
*/
|
| 76 |
+
function fixMatrixCommands(content) {
|
| 77 |
+
console.log(' 🔢 Converting matrix commands to KaTeX format...');
|
| 78 |
+
|
| 79 |
+
let fixedCount = 0;
|
| 80 |
+
|
| 81 |
+
// Convert \pmatrix{...} to \begin{pmatrix}...\end{pmatrix}
|
| 82 |
+
content = content.replace(/\\pmatrix\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}/g, (match, matrixContent) => {
|
| 83 |
+
fixedCount++;
|
| 84 |
+
// Split by \\ for rows, handle nested braces
|
| 85 |
+
const rows = matrixContent.split('\\\\').map(row => row.trim()).filter(row => row);
|
| 86 |
+
return `\\begin{pmatrix}\n${rows.join(' \\\\\n')}\n\\end{pmatrix}`;
|
| 87 |
+
});
|
| 88 |
+
|
| 89 |
+
// Convert \bmatrix{...} to \begin{bmatrix}...\end{bmatrix}
|
| 90 |
+
content = content.replace(/\\bmatrix\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}/g, (match, matrixContent) => {
|
| 91 |
+
fixedCount++;
|
| 92 |
+
const rows = matrixContent.split('\\\\').map(row => row.trim()).filter(row => row);
|
| 93 |
+
return `\\begin{bmatrix}\n${rows.join(' \\\\\n')}\n\\end{bmatrix}`;
|
| 94 |
+
});
|
| 95 |
+
|
| 96 |
+
// Convert \vmatrix{...} to \begin{vmatrix}...\end{vmatrix}
|
| 97 |
+
content = content.replace(/\\vmatrix\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}/g, (match, matrixContent) => {
|
| 98 |
+
fixedCount++;
|
| 99 |
+
const rows = matrixContent.split('\\\\').map(row => row.trim()).filter(row => row);
|
| 100 |
+
return `\\begin{vmatrix}\n${rows.join(' \\\\\n')}\n\\end{vmatrix}`;
|
| 101 |
+
});
|
| 102 |
+
|
| 103 |
+
if (fixedCount > 0) {
|
| 104 |
+
console.log(` ✅ Fixed ${fixedCount} matrix command(s)`);
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
return content;
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
/**
|
| 111 |
+
* Fix Unicode characters that break MDX/JSX parsing
|
| 112 |
+
* @param {string} content - Markdown content
|
| 113 |
+
* @returns {string} - Cleaned content
|
| 114 |
+
*/
|
| 115 |
+
function fixUnicodeIssues(content) {
|
| 116 |
+
console.log(' 🌐 Fixing Unicode characters for MDX compatibility...');
|
| 117 |
+
|
| 118 |
+
return content
|
| 119 |
+
// Replace Unicode middle dot (·) with \cdot in math expressions
|
| 120 |
+
.replace(/\$([^$]*?)·([^$]*?)\$/g, (match, before, after) => {
|
| 121 |
+
return `$${before}\\cdot${after}$`;
|
| 122 |
+
})
|
| 123 |
+
// Replace Unicode middle dot in display math
|
| 124 |
+
.replace(/\$\$([^$]*?)·([^$]*?)\$\$/g, (match, before, after) => {
|
| 125 |
+
return `$$${before}\\cdot${after}$$`;
|
| 126 |
+
})
|
| 127 |
+
// Replace other problematic Unicode characters
|
| 128 |
+
.replace(/[""]/g, '"') // Smart quotes to regular quotes
|
| 129 |
+
.replace(/['']/g, "'") // Smart apostrophes to regular apostrophes
|
| 130 |
+
.replace(/…/g, '...') // Ellipsis to three dots
|
| 131 |
+
.replace(/–/g, '-') // En dash to hyphen
|
| 132 |
+
.replace(/—/g, '--'); // Em dash to double hyphen
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
/**
|
| 136 |
+
* Fix multiline math expressions for MDX compatibility
|
| 137 |
+
* @param {string} content - Markdown content
|
| 138 |
+
* @returns {string} - Cleaned content
|
| 139 |
+
*/
|
| 140 |
+
function fixMultilineMath(content) {
|
| 141 |
+
console.log(' 📏 Fixing multiline math expressions for MDX...');
|
| 142 |
+
|
| 143 |
+
return content
|
| 144 |
+
// Convert multiline inline math to display math blocks (more precise regex)
|
| 145 |
+
// Only match if the content is a self-contained math expression within a single line
|
| 146 |
+
.replace(/\$([^$\n]*\\\\[^$\n]*)\$/g, (match, mathContent) => {
|
| 147 |
+
// Only convert if it contains actual math operators and line breaks
|
| 148 |
+
if (mathContent.includes('\\\\') && /[=+\-*/^_{}]/.test(mathContent)) {
|
| 149 |
+
// Remove leading/trailing whitespace and normalize newlines
|
| 150 |
+
const cleanedMath = mathContent
|
| 151 |
+
.replace(/^\s+|\s+$/g, '')
|
| 152 |
+
.replace(/\s*\\\\\s*/g, '\\\\\n ');
|
| 153 |
+
return `$$\n${cleanedMath}\n$$`;
|
| 154 |
+
}
|
| 155 |
+
return match; // Keep original if it doesn't look like multiline math
|
| 156 |
+
})
|
| 157 |
+
// Ensure display math blocks are properly separated
|
| 158 |
+
.replace(/\$\$\s*\n\s*([^$]+?)\s*\n\s*\$\$/g, (match, mathContent) => {
|
| 159 |
+
return `\n$$\n${mathContent.trim()}\n$$\n`;
|
| 160 |
+
});
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
/**
|
| 164 |
+
* Inject code snippets into empty code blocks
|
| 165 |
+
* @param {string} content - Markdown content
|
| 166 |
+
* @param {string} inputDir - Directory containing the LaTeX source and snippets
|
| 167 |
+
* @returns {string} - Content with injected code snippets
|
| 168 |
+
*/
|
| 169 |
+
function injectCodeSnippets(content, inputDir = null) {
|
| 170 |
+
console.log(' 💻 Injecting code snippets...');
|
| 171 |
+
|
| 172 |
+
if (!inputDir) {
|
| 173 |
+
console.log(' ⚠️ No input directory provided, skipping code injection');
|
| 174 |
+
return content;
|
| 175 |
+
}
|
| 176 |
+
|
| 177 |
+
const snippetsDir = join(inputDir, 'snippets');
|
| 178 |
+
|
| 179 |
+
if (!existsSync(snippetsDir)) {
|
| 180 |
+
console.log(' ⚠️ Snippets directory not found, skipping code injection');
|
| 181 |
+
return content;
|
| 182 |
+
}
|
| 183 |
+
|
| 184 |
+
// Get all available snippet files
|
| 185 |
+
let availableSnippets = [];
|
| 186 |
+
try {
|
| 187 |
+
availableSnippets = readdirSync(snippetsDir);
|
| 188 |
+
console.log(` 📁 Found ${availableSnippets.length} snippet file(s): ${availableSnippets.join(', ')}`);
|
| 189 |
+
} catch (error) {
|
| 190 |
+
console.log(` ❌ Error reading snippets directory: ${error.message}`);
|
| 191 |
+
return content;
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
// Find all empty code blocks
|
| 195 |
+
const emptyCodeBlockPattern = /```\s*(\w+)\s*\n\s*```/g;
|
| 196 |
+
|
| 197 |
+
let processedContent = content;
|
| 198 |
+
let injectionCount = 0;
|
| 199 |
+
|
| 200 |
+
processedContent = processedContent.replace(emptyCodeBlockPattern, (match, language) => {
|
| 201 |
+
// Map language names to file extensions
|
| 202 |
+
const extensionMap = {
|
| 203 |
+
'python': 'py',
|
| 204 |
+
'javascript': 'js',
|
| 205 |
+
'typescript': 'ts',
|
| 206 |
+
'bash': 'sh',
|
| 207 |
+
'shell': 'sh'
|
| 208 |
+
};
|
| 209 |
+
|
| 210 |
+
const fileExtension = extensionMap[language] || language;
|
| 211 |
+
|
| 212 |
+
// Try to find a matching snippet file for this language
|
| 213 |
+
const matchingFiles = availableSnippets.filter(file =>
|
| 214 |
+
file.endsWith(`.${fileExtension}`)
|
| 215 |
+
);
|
| 216 |
+
|
| 217 |
+
if (matchingFiles.length === 0) {
|
| 218 |
+
console.log(` ⚠️ No ${language} snippet found (looking for .${fileExtension})`);
|
| 219 |
+
return match;
|
| 220 |
+
}
|
| 221 |
+
|
| 222 |
+
// Use the first matching file (could be made smarter with context analysis)
|
| 223 |
+
const selectedFile = matchingFiles[0];
|
| 224 |
+
const snippetPath = join(snippetsDir, selectedFile);
|
| 225 |
+
|
| 226 |
+
try {
|
| 227 |
+
const snippetContent = readFileSync(snippetPath, 'utf8');
|
| 228 |
+
injectionCount++;
|
| 229 |
+
console.log(` ✅ Injected: ${selectedFile}`);
|
| 230 |
+
return `\`\`\`${language}\n${snippetContent.trim()}\n\`\`\``;
|
| 231 |
+
} catch (error) {
|
| 232 |
+
console.log(` ❌ Error reading ${selectedFile}: ${error.message}`);
|
| 233 |
+
return match;
|
| 234 |
+
}
|
| 235 |
+
});
|
| 236 |
+
|
| 237 |
+
if (injectionCount > 0) {
|
| 238 |
+
console.log(` 📊 Injected ${injectionCount} code snippet(s)`);
|
| 239 |
+
}
|
| 240 |
+
|
| 241 |
+
return processedContent;
|
| 242 |
+
}
|
| 243 |
+
|
| 244 |
+
/**
|
| 245 |
+
* Fix all attributes that still contain colons (href, data-reference, id)
|
| 246 |
+
* @param {string} content - Markdown content
|
| 247 |
+
* @returns {string} - Cleaned content
|
| 248 |
+
*/
|
| 249 |
+
function fixAllAttributes(content) {
|
| 250 |
+
console.log(' 🔗 Fixing all attributes with colons...');
|
| 251 |
+
|
| 252 |
+
let fixedCount = 0;
|
| 253 |
+
|
| 254 |
+
// Fix href attributes containing colons
|
| 255 |
+
content = content.replace(/href="([^"]*):([^"]*)"/g, (match, before, after) => {
|
| 256 |
+
fixedCount++;
|
| 257 |
+
return `href="${before}-${after}"`;
|
| 258 |
+
});
|
| 259 |
+
|
| 260 |
+
// Fix data-reference attributes containing colons
|
| 261 |
+
content = content.replace(/data-reference="([^"]*):([^"]*)"/g, (match, before, after) => {
|
| 262 |
+
fixedCount++;
|
| 263 |
+
return `data-reference="${before}-${after}"`;
|
| 264 |
+
});
|
| 265 |
+
|
| 266 |
+
// Fix id attributes containing colons (like in Figure components)
|
| 267 |
+
content = content.replace(/id="([^"]*):([^"]*)"/g, (match, before, after) => {
|
| 268 |
+
fixedCount++;
|
| 269 |
+
return `id="${before}-${after}"`;
|
| 270 |
+
});
|
| 271 |
+
|
| 272 |
+
if (fixedCount > 0) {
|
| 273 |
+
console.log(` ✅ Fixed ${fixedCount} attribute(s) with colons`);
|
| 274 |
+
}
|
| 275 |
+
|
| 276 |
+
return content;
|
| 277 |
+
}
|
| 278 |
+
|
| 279 |
+
/**
|
| 280 |
+
* Fix link text content that still contains colons
|
| 281 |
+
* @param {string} content - Markdown content
|
| 282 |
+
* @returns {string} - Cleaned content
|
| 283 |
+
*/
|
| 284 |
+
function fixLinkTextContent(content) {
|
| 285 |
+
console.log(' 📝 Fixing link text content with colons...');
|
| 286 |
+
|
| 287 |
+
let fixedCount = 0;
|
| 288 |
+
|
| 289 |
+
// Fix text content within links that contain references with colons
|
| 290 |
+
// Pattern: <a ...>[text:content]</a>
|
| 291 |
+
const cleanedContent = content.replace(/<a([^>]*)>\[([^:]*):([^\]]*)\]<\/a>/g, (match, attributes, before, after) => {
|
| 292 |
+
fixedCount++;
|
| 293 |
+
return `<a${attributes}>[${before}-${after}]</a>`;
|
| 294 |
+
});
|
| 295 |
+
|
| 296 |
+
if (fixedCount > 0) {
|
| 297 |
+
console.log(` ✅ Fixed ${fixedCount} link text(s) with colons`);
|
| 298 |
+
}
|
| 299 |
+
|
| 300 |
+
return cleanedContent;
|
| 301 |
+
}
|
| 302 |
+
|
| 303 |
+
/**
|
| 304 |
+
* Convert align anchor markers to proper HTML spans outside math blocks
|
| 305 |
+
* @param {string} content - Markdown content
|
| 306 |
+
* @returns {string} - Content with converted anchor spans
|
| 307 |
+
*/
|
| 308 |
+
function convertAlignAnchors(content) {
|
| 309 |
+
console.log(' 🏷️ Converting align anchor markers to HTML spans...');
|
| 310 |
+
|
| 311 |
+
let convertedCount = 0;
|
| 312 |
+
|
| 313 |
+
// Find and replace align anchor markers with proper spans outside math blocks
|
| 314 |
+
content = content.replace(/``` math\n%%ALIGN_ANCHOR_ID\{([^}]+)\}%%\n([\s\S]*?)\n```/g, (match, anchorId, mathContent) => {
|
| 315 |
+
convertedCount++;
|
| 316 |
+
return `<span id="${anchorId}" style="position: absolute;"></span>\n\n\`\`\` math\n${mathContent}\n\`\`\``;
|
| 317 |
+
});
|
| 318 |
+
|
| 319 |
+
if (convertedCount > 0) {
|
| 320 |
+
console.log(` ✅ Converted ${convertedCount} align anchor marker(s) to spans`);
|
| 321 |
+
}
|
| 322 |
+
|
| 323 |
+
return content;
|
| 324 |
+
}
|
| 325 |
+
|
| 326 |
+
/**
|
| 327 |
+
* Main post-processing function that applies all cleanup steps
|
| 328 |
+
* @param {string} content - Raw Markdown content from Pandoc
|
| 329 |
+
* @param {string} inputDir - Optional: Directory containing LaTeX source for code injection
|
| 330 |
+
* @returns {string} - Cleaned Markdown content
|
| 331 |
+
*/
|
| 332 |
+
export function postProcessMarkdown(content, inputDir = null) {
|
| 333 |
+
console.log('🔧 Post-processing for KaTeX compatibility...');
|
| 334 |
+
|
| 335 |
+
let processedContent = content;
|
| 336 |
+
|
| 337 |
+
// Apply each cleanup step sequentially
|
| 338 |
+
processedContent = removeTexGroupingCommands(processedContent);
|
| 339 |
+
processedContent = simplifyLatexDelimiters(processedContent);
|
| 340 |
+
processedContent = removeOrphanedLabels(processedContent);
|
| 341 |
+
processedContent = convertAlignAnchors(processedContent);
|
| 342 |
+
processedContent = fixMathCommands(processedContent);
|
| 343 |
+
processedContent = fixMatrixCommands(processedContent);
|
| 344 |
+
processedContent = fixUnicodeIssues(processedContent);
|
| 345 |
+
processedContent = fixMultilineMath(processedContent);
|
| 346 |
+
processedContent = fixAllAttributes(processedContent);
|
| 347 |
+
processedContent = fixLinkTextContent(processedContent);
|
| 348 |
+
|
| 349 |
+
// Inject code snippets if input directory is provided
|
| 350 |
+
if (inputDir) {
|
| 351 |
+
processedContent = injectCodeSnippets(processedContent, inputDir);
|
| 352 |
+
}
|
| 353 |
+
|
| 354 |
+
return processedContent;
|
| 355 |
+
}
|
| 356 |
+
|
| 357 |
+
/**
|
| 358 |
+
* CLI interface for standalone usage
|
| 359 |
+
*/
|
| 360 |
+
function parseArgs() {
|
| 361 |
+
const args = process.argv.slice(2);
|
| 362 |
+
const config = {
|
| 363 |
+
input: join(__dirname, 'output', 'main.md'),
|
| 364 |
+
output: null, // Will default to input if not specified
|
| 365 |
+
verbose: false,
|
| 366 |
+
};
|
| 367 |
+
|
| 368 |
+
for (const arg of args) {
|
| 369 |
+
if (arg.startsWith('--input=')) {
|
| 370 |
+
config.input = arg.substring('--input='.length);
|
| 371 |
+
} else if (arg.startsWith('--output=')) {
|
| 372 |
+
config.output = arg.substring('--output='.length);
|
| 373 |
+
} else if (arg === '--verbose') {
|
| 374 |
+
config.verbose = true;
|
| 375 |
+
} else if (arg === '--help' || arg === '-h') {
|
| 376 |
+
console.log(`
|
| 377 |
+
🔧 Markdown Post-Processor
|
| 378 |
+
|
| 379 |
+
Usage:
|
| 380 |
+
node post-processor.mjs [options]
|
| 381 |
+
|
| 382 |
+
Options:
|
| 383 |
+
--input=PATH Input Markdown file (default: output/main.md)
|
| 384 |
+
--output=PATH Output file (default: overwrites input)
|
| 385 |
+
--verbose Verbose output
|
| 386 |
+
--help, -h Show this help
|
| 387 |
+
|
| 388 |
+
Examples:
|
| 389 |
+
# Process main.md in-place
|
| 390 |
+
node post-processor.mjs
|
| 391 |
+
|
| 392 |
+
# Process with custom paths
|
| 393 |
+
node post-processor.mjs --input=raw.md --output=clean.md
|
| 394 |
+
`);
|
| 395 |
+
process.exit(0);
|
| 396 |
+
}
|
| 397 |
+
}
|
| 398 |
+
|
| 399 |
+
// Default output to input if not specified
|
| 400 |
+
if (!config.output) {
|
| 401 |
+
config.output = config.input;
|
| 402 |
+
}
|
| 403 |
+
|
| 404 |
+
return config;
|
| 405 |
+
}
|
| 406 |
+
|
| 407 |
+
function main() {
|
| 408 |
+
const config = parseArgs();
|
| 409 |
+
|
| 410 |
+
console.log('🔧 Markdown Post-Processor');
|
| 411 |
+
console.log(`📁 Input: ${config.input}`);
|
| 412 |
+
console.log(`📁 Output: ${config.output}`);
|
| 413 |
+
|
| 414 |
+
try {
|
| 415 |
+
const content = readFileSync(config.input, 'utf8');
|
| 416 |
+
const processedContent = postProcessMarkdown(content);
|
| 417 |
+
|
| 418 |
+
writeFileSync(config.output, processedContent);
|
| 419 |
+
|
| 420 |
+
console.log(`✅ Post-processing completed: ${config.output}`);
|
| 421 |
+
|
| 422 |
+
// Show stats if verbose
|
| 423 |
+
if (config.verbose) {
|
| 424 |
+
const originalLines = content.split('\n').length;
|
| 425 |
+
const processedLines = processedContent.split('\n').length;
|
| 426 |
+
console.log(`📊 Lines: ${originalLines} → ${processedLines}`);
|
| 427 |
+
}
|
| 428 |
+
|
| 429 |
+
} catch (error) {
|
| 430 |
+
console.error('❌ Post-processing failed:');
|
| 431 |
+
console.error(error.message);
|
| 432 |
+
process.exit(1);
|
| 433 |
+
}
|
| 434 |
+
}
|
| 435 |
+
|
| 436 |
+
// Run CLI if called directly
|
| 437 |
+
if (import.meta.url === `file://${process.argv[1]}`) {
|
| 438 |
+
main();
|
| 439 |
+
}
|
app/scripts/latex-importer/reference-preprocessor.mjs
ADDED
|
@@ -0,0 +1,239 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env node
|
| 2 |
+
|
| 3 |
+
/**
|
| 4 |
+
* LaTeX Reference Preprocessor
|
| 5 |
+
*
|
| 6 |
+
* This module cleans up LaTeX references BEFORE Pandoc conversion to ensure
|
| 7 |
+
* consistent, MDX-compatible identifiers throughout the document.
|
| 8 |
+
*
|
| 9 |
+
* What it does:
|
| 10 |
+
* - Removes prefixes from labels: \label{sec:intro} → \label{sec-intro}
|
| 11 |
+
* - Updates corresponding refs: \ref{sec:intro} → \ref{sec-intro}
|
| 12 |
+
* - Handles all reference types: sec:, fig:, eq:, table:, etc.
|
| 13 |
+
* - Maintains consistency between labels and references
|
| 14 |
+
*/
|
| 15 |
+
|
| 16 |
+
/**
|
| 17 |
+
* Extract all references from LaTeX content
|
| 18 |
+
* @param {string} content - LaTeX content
|
| 19 |
+
* @returns {Object} - Object with labels and refs arrays
|
| 20 |
+
*/
|
| 21 |
+
function extractReferences(content) {
|
| 22 |
+
const references = {
|
| 23 |
+
labels: new Set(),
|
| 24 |
+
refs: new Set(),
|
| 25 |
+
cites: new Set()
|
| 26 |
+
};
|
| 27 |
+
|
| 28 |
+
// Find all \label{...} commands
|
| 29 |
+
const labelMatches = content.matchAll(/\\label\{([^}]+)\}/g);
|
| 30 |
+
for (const match of labelMatches) {
|
| 31 |
+
references.labels.add(match[1]);
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
// Find all \ref{...} commands
|
| 35 |
+
const refMatches = content.matchAll(/\\ref\{([^}]+)\}/g);
|
| 36 |
+
for (const match of refMatches) {
|
| 37 |
+
references.refs.add(match[1]);
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
// Find all \cite{...} commands (already handled in existing code but included for completeness)
|
| 41 |
+
const citeMatches = content.matchAll(/\\cite[tp]?\{([^}]+)\}/g);
|
| 42 |
+
for (const match of citeMatches) {
|
| 43 |
+
// Handle multiple citations: \cite{ref1,ref2,ref3}
|
| 44 |
+
const citations = match[1].split(',').map(cite => cite.trim());
|
| 45 |
+
citations.forEach(cite => references.cites.add(cite));
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
return references;
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
/**
|
| 52 |
+
* Create clean identifier mapping
|
| 53 |
+
* @param {Object} references - References object from extractReferences
|
| 54 |
+
* @returns {Map} - Mapping from original to clean identifiers
|
| 55 |
+
*/
|
| 56 |
+
function createCleanMapping(references) {
|
| 57 |
+
const mapping = new Map();
|
| 58 |
+
|
| 59 |
+
// Create mapping for all unique identifiers
|
| 60 |
+
const allIdentifiers = new Set([
|
| 61 |
+
...references.labels,
|
| 62 |
+
...references.refs
|
| 63 |
+
]);
|
| 64 |
+
|
| 65 |
+
for (const id of allIdentifiers) {
|
| 66 |
+
// Remove common prefixes and replace colons with dashes
|
| 67 |
+
let cleanId = id
|
| 68 |
+
.replace(/^(sec|section|ch|chapter|fig|figure|eq|equation|tab|table|lst|listing|app|appendix):/gi, '')
|
| 69 |
+
.replace(/:/g, '-')
|
| 70 |
+
.replace(/[^a-zA-Z0-9_-]/g, '-') // Replace any other problematic characters
|
| 71 |
+
.replace(/-+/g, '-') // Collapse multiple dashes
|
| 72 |
+
.replace(/^-|-$/g, ''); // Remove leading/trailing dashes
|
| 73 |
+
|
| 74 |
+
// Ensure we don't have empty identifiers
|
| 75 |
+
if (!cleanId) {
|
| 76 |
+
cleanId = id.replace(/:/g, '-');
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
mapping.set(id, cleanId);
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
return mapping;
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
/**
|
| 86 |
+
* Convert labels to HTML anchor spans for better MDX compatibility
|
| 87 |
+
* @param {string} content - LaTeX content
|
| 88 |
+
* @param {Map} mapping - Identifier mapping (original -> clean)
|
| 89 |
+
* @returns {Object} - Result with content and count of conversions
|
| 90 |
+
*/
|
| 91 |
+
function convertLabelsToAnchors(content, mapping) {
|
| 92 |
+
let processedContent = content;
|
| 93 |
+
let anchorsCreated = 0;
|
| 94 |
+
|
| 95 |
+
// Replace \label{...} with HTML anchor spans, but SKIP labels inside math environments
|
| 96 |
+
for (const [original, clean] of mapping) {
|
| 97 |
+
// Skip equation labels (they will be handled by the Lua filter)
|
| 98 |
+
if (original.startsWith('eq:')) {
|
| 99 |
+
continue;
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
const labelRegex = new RegExp(`\\\\label\\{${escapeRegex(original)}\\}`, 'g');
|
| 103 |
+
const labelMatches = processedContent.match(labelRegex);
|
| 104 |
+
|
| 105 |
+
if (labelMatches) {
|
| 106 |
+
// Replace \label{original} with HTML span anchor (invisible but accessible)
|
| 107 |
+
processedContent = processedContent.replace(labelRegex, `\n\n<span id="${clean}" style="position: absolute;"></span>\n\n`);
|
| 108 |
+
anchorsCreated += labelMatches.length;
|
| 109 |
+
}
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
return { content: processedContent, anchorsCreated };
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
/**
|
| 116 |
+
* Convert \highlight{...} commands to HTML spans with CSS class
|
| 117 |
+
* @param {string} content - LaTeX content
|
| 118 |
+
* @returns {Object} - Result with content and count of conversions
|
| 119 |
+
*/
|
| 120 |
+
function convertHighlightCommands(content) {
|
| 121 |
+
let processedContent = content;
|
| 122 |
+
let highlightsConverted = 0;
|
| 123 |
+
|
| 124 |
+
// Replace \highlight{...} with <span class="highlight">...</span>
|
| 125 |
+
processedContent = processedContent.replace(/\\highlight\{([^}]+)\}/g, (match, text) => {
|
| 126 |
+
highlightsConverted++;
|
| 127 |
+
return `<span class="highlight">${text}</span>`;
|
| 128 |
+
});
|
| 129 |
+
|
| 130 |
+
return { content: processedContent, highlightsConverted };
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
/**
|
| 134 |
+
* Apply mapping to LaTeX content
|
| 135 |
+
* @param {string} content - Original LaTeX content
|
| 136 |
+
* @param {Map} mapping - Identifier mapping
|
| 137 |
+
* @returns {string} - Cleaned LaTeX content
|
| 138 |
+
*/
|
| 139 |
+
function applyMapping(content, mapping) {
|
| 140 |
+
let cleanedContent = content;
|
| 141 |
+
let changesCount = 0;
|
| 142 |
+
|
| 143 |
+
// First, convert labels to anchor spans
|
| 144 |
+
const anchorResult = convertLabelsToAnchors(cleanedContent, mapping);
|
| 145 |
+
cleanedContent = anchorResult.content;
|
| 146 |
+
const anchorsCreated = anchorResult.anchorsCreated;
|
| 147 |
+
|
| 148 |
+
// Convert \highlight{} commands to spans
|
| 149 |
+
const highlightResult = convertHighlightCommands(cleanedContent);
|
| 150 |
+
cleanedContent = highlightResult.content;
|
| 151 |
+
const highlightsConverted = highlightResult.highlightsConverted;
|
| 152 |
+
|
| 153 |
+
// Then apply mapping to remaining references and equation labels
|
| 154 |
+
for (const [original, clean] of mapping) {
|
| 155 |
+
if (original !== clean) {
|
| 156 |
+
// Replace \ref{original} with \ref{clean}
|
| 157 |
+
const refRegex = new RegExp(`\\\\ref\\{${escapeRegex(original)}\\}`, 'g');
|
| 158 |
+
const refMatches = cleanedContent.match(refRegex);
|
| 159 |
+
if (refMatches) {
|
| 160 |
+
cleanedContent = cleanedContent.replace(refRegex, `\\ref{${clean}}`);
|
| 161 |
+
changesCount += refMatches.length;
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
// For equation labels, still clean the labels themselves (for the Lua filter)
|
| 165 |
+
if (original.startsWith('eq:')) {
|
| 166 |
+
const labelRegex = new RegExp(`\\\\label\\{${escapeRegex(original)}\\}`, 'g');
|
| 167 |
+
const labelMatches = cleanedContent.match(labelRegex);
|
| 168 |
+
if (labelMatches) {
|
| 169 |
+
cleanedContent = cleanedContent.replace(labelRegex, `\\label{${clean}}`);
|
| 170 |
+
changesCount += labelMatches.length;
|
| 171 |
+
}
|
| 172 |
+
}
|
| 173 |
+
}
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
return {
|
| 177 |
+
content: cleanedContent,
|
| 178 |
+
changesCount: changesCount + anchorsCreated,
|
| 179 |
+
highlightsConverted: highlightsConverted
|
| 180 |
+
};
|
| 181 |
+
}
|
| 182 |
+
|
| 183 |
+
/**
|
| 184 |
+
* Escape special regex characters
|
| 185 |
+
* @param {string} string - String to escape
|
| 186 |
+
* @returns {string} - Escaped string
|
| 187 |
+
*/
|
| 188 |
+
function escapeRegex(string) {
|
| 189 |
+
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
| 190 |
+
}
|
| 191 |
+
|
| 192 |
+
/**
|
| 193 |
+
* Main preprocessing function
|
| 194 |
+
* @param {string} latexContent - Original LaTeX content
|
| 195 |
+
* @returns {Object} - Result with cleaned content and statistics
|
| 196 |
+
*/
|
| 197 |
+
export function preprocessLatexReferences(latexContent) {
|
| 198 |
+
console.log('🔧 Preprocessing LaTeX references for MDX compatibility...');
|
| 199 |
+
|
| 200 |
+
// 1. Extract all references
|
| 201 |
+
const references = extractReferences(latexContent);
|
| 202 |
+
|
| 203 |
+
console.log(` 📊 Found: ${references.labels.size} labels, ${references.refs.size} refs`);
|
| 204 |
+
|
| 205 |
+
// 2. Create clean mapping
|
| 206 |
+
const mapping = createCleanMapping(references);
|
| 207 |
+
|
| 208 |
+
// 3. Apply mapping
|
| 209 |
+
const result = applyMapping(latexContent, mapping);
|
| 210 |
+
|
| 211 |
+
if (result.changesCount > 0) {
|
| 212 |
+
console.log(` ✅ Processed ${result.changesCount} reference(s) and created anchor spans`);
|
| 213 |
+
|
| 214 |
+
// Show some examples of changes
|
| 215 |
+
let exampleCount = 0;
|
| 216 |
+
for (const [original, clean] of mapping) {
|
| 217 |
+
if (original !== clean && exampleCount < 3) {
|
| 218 |
+
console.log(` ${original} → ${clean} (span + refs)`);
|
| 219 |
+
exampleCount++;
|
| 220 |
+
}
|
| 221 |
+
}
|
| 222 |
+
if (mapping.size > 3) {
|
| 223 |
+
console.log(` ... and ${mapping.size - 3} more anchor spans created`);
|
| 224 |
+
}
|
| 225 |
+
} else {
|
| 226 |
+
console.log(' ℹ️ No reference cleanup needed');
|
| 227 |
+
}
|
| 228 |
+
|
| 229 |
+
if (result.highlightsConverted > 0) {
|
| 230 |
+
console.log(` ✨ Converted ${result.highlightsConverted} \\highlight{} command(s) to <span class="highlight">`);
|
| 231 |
+
}
|
| 232 |
+
|
| 233 |
+
return {
|
| 234 |
+
content: result.content,
|
| 235 |
+
changesCount: result.changesCount,
|
| 236 |
+
mapping: mapping,
|
| 237 |
+
references: references
|
| 238 |
+
};
|
| 239 |
+
}
|
app/scripts/notion-importer/.cursorignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
.env
|
app/scripts/notion-importer/.notion-to-md/media/27877f1c-9c9d-804d-9c82-f7b3905578ff_media.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0bd4ab2fce404409575347c4d1941d7934aa5022407e2c91098bb2c31a0faed9
|
| 3 |
+
size 36783
|
app/scripts/notion-importer/custom-code-renderer.mjs
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env node
|
| 2 |
+
|
| 3 |
+
/**
|
| 4 |
+
* Custom Code Block Renderer for notion-to-md
|
| 5 |
+
* Fixes the issue where code blocks end with "text" instead of proper closing
|
| 6 |
+
*/
|
| 7 |
+
|
| 8 |
+
export function createCustomCodeRenderer() {
|
| 9 |
+
return {
|
| 10 |
+
name: 'custom-code-renderer',
|
| 11 |
+
type: 'renderer',
|
| 12 |
+
|
| 13 |
+
/**
|
| 14 |
+
* Custom renderer for code blocks
|
| 15 |
+
* @param {Object} block - Notion code block
|
| 16 |
+
* @returns {string} - Properly formatted markdown code block
|
| 17 |
+
*/
|
| 18 |
+
code: (block) => {
|
| 19 |
+
const { language, rich_text } = block.code;
|
| 20 |
+
|
| 21 |
+
// Extract the actual code content from rich_text
|
| 22 |
+
const codeContent = rich_text
|
| 23 |
+
.map(text => text.plain_text)
|
| 24 |
+
.join('');
|
| 25 |
+
|
| 26 |
+
// Determine the language (default to empty string if not specified)
|
| 27 |
+
const lang = language || '';
|
| 28 |
+
|
| 29 |
+
// Return properly formatted markdown code block
|
| 30 |
+
return `\`\`\`${lang}\n${codeContent}\n\`\`\``;
|
| 31 |
+
}
|
| 32 |
+
};
|
| 33 |
+
}
|
app/scripts/notion-importer/debug-properties.mjs
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env node
|
| 2 |
+
|
| 3 |
+
import { config } from 'dotenv';
|
| 4 |
+
import { Client } from '@notionhq/client';
|
| 5 |
+
|
| 6 |
+
// Load environment variables from .env file
|
| 7 |
+
config();
|
| 8 |
+
|
| 9 |
+
const notion = new Client({
|
| 10 |
+
auth: process.env.NOTION_TOKEN,
|
| 11 |
+
});
|
| 12 |
+
|
| 13 |
+
async function debugPageProperties() {
|
| 14 |
+
const pageId = '27877f1c9c9d804d9c82f7b3905578ff';
|
| 15 |
+
|
| 16 |
+
try {
|
| 17 |
+
console.log('🔍 Debugging page properties...');
|
| 18 |
+
console.log(`📄 Page ID: ${pageId}`);
|
| 19 |
+
|
| 20 |
+
const page = await notion.pages.retrieve({ page_id: pageId });
|
| 21 |
+
|
| 22 |
+
console.log('\n📋 Available properties:');
|
| 23 |
+
console.log('========================');
|
| 24 |
+
|
| 25 |
+
for (const [key, value] of Object.entries(page.properties)) {
|
| 26 |
+
console.log(`\n🔹 ${key}:`);
|
| 27 |
+
console.log(` Type: ${value.type}`);
|
| 28 |
+
|
| 29 |
+
switch (value.type) {
|
| 30 |
+
case 'title':
|
| 31 |
+
console.log(` Value: "${value.title.map(t => t.plain_text).join('')}"`);
|
| 32 |
+
break;
|
| 33 |
+
case 'rich_text':
|
| 34 |
+
console.log(` Value: "${value.rich_text.map(t => t.plain_text).join('')}"`);
|
| 35 |
+
break;
|
| 36 |
+
case 'people':
|
| 37 |
+
console.log(` People: ${value.people.map(p => p.name || p.id).join(', ')}`);
|
| 38 |
+
break;
|
| 39 |
+
case 'select':
|
| 40 |
+
console.log(` Value: ${value.select?.name || 'null'}`);
|
| 41 |
+
break;
|
| 42 |
+
case 'multi_select':
|
| 43 |
+
console.log(` Values: [${value.multi_select.map(s => s.name).join(', ')}]`);
|
| 44 |
+
break;
|
| 45 |
+
case 'date':
|
| 46 |
+
console.log(` Value: ${value.date?.start || 'null'}`);
|
| 47 |
+
break;
|
| 48 |
+
case 'checkbox':
|
| 49 |
+
console.log(` Value: ${value.checkbox}`);
|
| 50 |
+
break;
|
| 51 |
+
case 'url':
|
| 52 |
+
console.log(` Value: ${value.url || 'null'}`);
|
| 53 |
+
break;
|
| 54 |
+
case 'email':
|
| 55 |
+
console.log(` Value: ${value.email || 'null'}`);
|
| 56 |
+
break;
|
| 57 |
+
case 'phone_number':
|
| 58 |
+
console.log(` Value: ${value.phone_number || 'null'}`);
|
| 59 |
+
break;
|
| 60 |
+
case 'number':
|
| 61 |
+
console.log(` Value: ${value.number || 'null'}`);
|
| 62 |
+
break;
|
| 63 |
+
case 'created_time':
|
| 64 |
+
console.log(` Value: ${value.created_time}`);
|
| 65 |
+
break;
|
| 66 |
+
case 'created_by':
|
| 67 |
+
console.log(` Value: ${value.created_by?.id || 'null'}`);
|
| 68 |
+
break;
|
| 69 |
+
case 'last_edited_time':
|
| 70 |
+
console.log(` Value: ${value.last_edited_time}`);
|
| 71 |
+
break;
|
| 72 |
+
case 'last_edited_by':
|
| 73 |
+
console.log(` Value: ${value.last_edited_by?.id || 'null'}`);
|
| 74 |
+
break;
|
| 75 |
+
default:
|
| 76 |
+
console.log(` Value: ${JSON.stringify(value, null, 2)}`);
|
| 77 |
+
}
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
console.log('\n✅ Properties debug completed!');
|
| 81 |
+
|
| 82 |
+
} catch (error) {
|
| 83 |
+
console.error('❌ Error:', error.message);
|
| 84 |
+
}
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
debugPageProperties();
|
app/scripts/notion-importer/input/pages.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d51fba4ce9b05562f5df611a150e3cd702b487d2e608441318336556e0f248a
|
| 3 |
+
size 188
|
app/scripts/notion-importer/mdx-converter.mjs
ADDED
|
@@ -0,0 +1,551 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env node
|
| 2 |
+
|
| 3 |
+
import { readFileSync, writeFileSync, existsSync, mkdirSync, readdirSync, statSync } from 'fs';
|
| 4 |
+
import { join, dirname, basename, extname } from 'path';
|
| 5 |
+
import { fileURLToPath } from 'url';
|
| 6 |
+
import matter from 'gray-matter';
|
| 7 |
+
import { extractAndGenerateNotionFrontmatter } from './notion-metadata-extractor.mjs';
|
| 8 |
+
|
| 9 |
+
const __filename = fileURLToPath(import.meta.url);
|
| 10 |
+
const __dirname = dirname(__filename);
|
| 11 |
+
|
| 12 |
+
// Configuration
|
| 13 |
+
const DEFAULT_INPUT = join(__dirname, 'output');
|
| 14 |
+
const DEFAULT_OUTPUT = join(__dirname, 'output');
|
| 15 |
+
|
| 16 |
+
function parseArgs() {
|
| 17 |
+
const args = process.argv.slice(2);
|
| 18 |
+
const config = {
|
| 19 |
+
input: DEFAULT_INPUT,
|
| 20 |
+
output: DEFAULT_OUTPUT,
|
| 21 |
+
};
|
| 22 |
+
|
| 23 |
+
for (const arg of args) {
|
| 24 |
+
if (arg.startsWith('--input=')) {
|
| 25 |
+
config.input = arg.substring('--input='.length);
|
| 26 |
+
} else if (arg.startsWith('--output=')) {
|
| 27 |
+
config.output = arg.substring('--output='.length);
|
| 28 |
+
} else if (arg === '--help' || arg === '-h') {
|
| 29 |
+
console.log(`
|
| 30 |
+
📝 Notion Markdown to MDX Converter
|
| 31 |
+
|
| 32 |
+
Usage:
|
| 33 |
+
node mdx-converter.mjs [options]
|
| 34 |
+
|
| 35 |
+
Options:
|
| 36 |
+
--input=PATH Input directory or file (default: ${DEFAULT_INPUT})
|
| 37 |
+
--output=PATH Output directory (default: ${DEFAULT_OUTPUT})
|
| 38 |
+
--help, -h Show this help
|
| 39 |
+
|
| 40 |
+
Examples:
|
| 41 |
+
# Convert all markdown files in output directory
|
| 42 |
+
node mdx-converter.mjs
|
| 43 |
+
|
| 44 |
+
# Convert specific file
|
| 45 |
+
node mdx-converter.mjs --input=article.md --output=converted/
|
| 46 |
+
|
| 47 |
+
# Convert directory
|
| 48 |
+
node mdx-converter.mjs --input=markdown-files/ --output=mdx-files/
|
| 49 |
+
`);
|
| 50 |
+
process.exit(0);
|
| 51 |
+
} else if (!config.input) {
|
| 52 |
+
config.input = arg;
|
| 53 |
+
} else if (!config.output) {
|
| 54 |
+
config.output = arg;
|
| 55 |
+
}
|
| 56 |
+
}
|
| 57 |
+
return config;
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
/**
|
| 61 |
+
* Track which Astro components are used during transformations
|
| 62 |
+
*/
|
| 63 |
+
const usedComponents = new Set();
|
| 64 |
+
|
| 65 |
+
/**
|
| 66 |
+
* Track individual image imports needed
|
| 67 |
+
*/
|
| 68 |
+
const imageImports = new Map(); // src -> varName
|
| 69 |
+
|
| 70 |
+
/**
|
| 71 |
+
* Generate a variable name from image path
|
| 72 |
+
* @param {string} src - Image source path
|
| 73 |
+
* @returns {string} - Valid variable name
|
| 74 |
+
*/
|
| 75 |
+
function generateImageVarName(src) {
|
| 76 |
+
// Extract filename without extension and make it a valid JS variable
|
| 77 |
+
const filename = src.split('/').pop().replace(/\.[^.]+$/, '');
|
| 78 |
+
return filename.replace(/[^a-zA-Z0-9]/g, '_').replace(/^[0-9]/, 'img_$&');
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
/**
|
| 82 |
+
* Add required component imports to the frontmatter
|
| 83 |
+
* @param {string} content - MDX content
|
| 84 |
+
* @returns {string} - Content with component imports
|
| 85 |
+
*/
|
| 86 |
+
function addComponentImports(content) {
|
| 87 |
+
console.log(' 📦 Adding component and image imports...');
|
| 88 |
+
|
| 89 |
+
let imports = [];
|
| 90 |
+
|
| 91 |
+
// Add component imports
|
| 92 |
+
if (usedComponents.size > 0) {
|
| 93 |
+
const componentImports = Array.from(usedComponents)
|
| 94 |
+
.map(component => `import ${component} from '../components/${component}.astro';`);
|
| 95 |
+
imports.push(...componentImports);
|
| 96 |
+
console.log(` ✅ Importing components: ${Array.from(usedComponents).join(', ')}`);
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
// Add image imports
|
| 100 |
+
if (imageImports.size > 0) {
|
| 101 |
+
const imageImportStatements = Array.from(imageImports.entries())
|
| 102 |
+
.map(([src, varName]) => `import ${varName} from '${src}';`);
|
| 103 |
+
imports.push(...imageImportStatements);
|
| 104 |
+
console.log(` ✅ Importing ${imageImports.size} image(s)`);
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
if (imports.length === 0) {
|
| 108 |
+
console.log(' ℹ️ No imports needed');
|
| 109 |
+
return content;
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
const importBlock = imports.join('\n');
|
| 113 |
+
|
| 114 |
+
// Insert imports after frontmatter
|
| 115 |
+
const frontmatterEnd = content.indexOf('---', 3) + 3;
|
| 116 |
+
if (frontmatterEnd > 2) {
|
| 117 |
+
return content.slice(0, frontmatterEnd) + '\n\n' + importBlock + '\n' + content.slice(frontmatterEnd);
|
| 118 |
+
} else {
|
| 119 |
+
// No frontmatter, add at beginning
|
| 120 |
+
return importBlock + '\n\n' + content;
|
| 121 |
+
}
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
/**
|
| 125 |
+
* Transform Notion images to Figure components
|
| 126 |
+
* @param {string} content - MDX content
|
| 127 |
+
* @returns {string} - Content with Figure components
|
| 128 |
+
*/
|
| 129 |
+
function transformImages(content) {
|
| 130 |
+
console.log(' 🖼️ Transforming images to Figure components...');
|
| 131 |
+
|
| 132 |
+
let hasImages = false;
|
| 133 |
+
|
| 134 |
+
// Helper function to clean source paths
|
| 135 |
+
const cleanSrcPath = (src) => {
|
| 136 |
+
// Convert Notion media paths to relative paths
|
| 137 |
+
return src.replace(/^\/media\//, './media/')
|
| 138 |
+
.replace(/^\.\/media\//, './media/');
|
| 139 |
+
};
|
| 140 |
+
|
| 141 |
+
// Helper to clean caption text
|
| 142 |
+
const cleanCaption = (caption) => {
|
| 143 |
+
return caption
|
| 144 |
+
.replace(/<[^>]*>/g, '') // Remove HTML tags
|
| 145 |
+
.replace(/\n/g, ' ') // Replace newlines with spaces
|
| 146 |
+
.replace(/\r/g, ' ') // Replace carriage returns with spaces
|
| 147 |
+
.replace(/\s+/g, ' ') // Replace multiple spaces with single space
|
| 148 |
+
.replace(/'/g, "\\'") // Escape quotes
|
| 149 |
+
.trim(); // Trim whitespace
|
| 150 |
+
};
|
| 151 |
+
|
| 152 |
+
// Helper to clean alt text
|
| 153 |
+
const cleanAltText = (alt, maxLength = 100) => {
|
| 154 |
+
const cleaned = alt
|
| 155 |
+
.replace(/<[^>]*>/g, '') // Remove HTML tags
|
| 156 |
+
.replace(/\n/g, ' ') // Replace newlines with spaces
|
| 157 |
+
.replace(/\r/g, ' ') // Replace carriage returns with spaces
|
| 158 |
+
.replace(/\s+/g, ' ') // Replace multiple spaces with single space
|
| 159 |
+
.trim(); // Trim whitespace
|
| 160 |
+
|
| 161 |
+
return cleaned.length > maxLength
|
| 162 |
+
? cleaned.substring(0, maxLength) + '...'
|
| 163 |
+
: cleaned;
|
| 164 |
+
};
|
| 165 |
+
|
| 166 |
+
// Create Figure component with import
|
| 167 |
+
const createFigureComponent = (src, alt = '', caption = '') => {
|
| 168 |
+
const cleanSrc = cleanSrcPath(src);
|
| 169 |
+
|
| 170 |
+
// Skip PDF URLs and external URLs - they should remain as links only
|
| 171 |
+
if (cleanSrc.includes('.pdf') || cleanSrc.includes('arxiv.org/pdf') ||
|
| 172 |
+
(cleanSrc.startsWith('http') && !cleanSrc.includes('/media/'))) {
|
| 173 |
+
console.log(` ⚠️ Skipping external/PDF URL: ${cleanSrc}`);
|
| 174 |
+
// Return the original markdown image syntax for external URLs
|
| 175 |
+
return ``;
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
const varName = generateImageVarName(cleanSrc);
|
| 179 |
+
imageImports.set(cleanSrc, varName);
|
| 180 |
+
usedComponents.add('Figure');
|
| 181 |
+
|
| 182 |
+
const props = [];
|
| 183 |
+
props.push(`src={${varName}}`);
|
| 184 |
+
props.push('zoomable');
|
| 185 |
+
props.push('downloadable');
|
| 186 |
+
props.push('layout="fixed"');
|
| 187 |
+
if (alt) props.push(`alt="${alt}"`);
|
| 188 |
+
if (caption) props.push(`caption={'${caption}'}`);
|
| 189 |
+
|
| 190 |
+
return `<Figure\n ${props.join('\n ')}\n/>`;
|
| 191 |
+
};
|
| 192 |
+
|
| 193 |
+
// Transform markdown images: 
|
| 194 |
+
content = content.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, (match, alt, src) => {
|
| 195 |
+
const cleanSrc = cleanSrcPath(src);
|
| 196 |
+
const cleanAlt = cleanAltText(alt || 'Figure');
|
| 197 |
+
hasImages = true;
|
| 198 |
+
|
| 199 |
+
return createFigureComponent(cleanSrc, cleanAlt);
|
| 200 |
+
});
|
| 201 |
+
|
| 202 |
+
// Transform images with captions (Notion sometimes adds captions as separate text)
|
| 203 |
+
content = content.replace(/!\[([^\]]*)\]\(([^)]+)\)\s*\n\s*([^\n]+)/g, (match, alt, src, caption) => {
|
| 204 |
+
const cleanSrc = cleanSrcPath(src);
|
| 205 |
+
const cleanAlt = cleanAltText(alt || 'Figure');
|
| 206 |
+
const cleanCap = cleanCaption(caption);
|
| 207 |
+
hasImages = true;
|
| 208 |
+
|
| 209 |
+
return createFigureComponent(cleanSrc, cleanAlt, cleanCap);
|
| 210 |
+
});
|
| 211 |
+
|
| 212 |
+
if (hasImages) {
|
| 213 |
+
console.log(' ✅ Figure components with imports will be created');
|
| 214 |
+
}
|
| 215 |
+
|
| 216 |
+
return content;
|
| 217 |
+
}
|
| 218 |
+
|
| 219 |
+
/**
|
| 220 |
+
* Transform Notion callouts to Note components
|
| 221 |
+
* @param {string} content - MDX content
|
| 222 |
+
* @returns {string} - Content with Note components
|
| 223 |
+
*/
|
| 224 |
+
function transformCallouts(content) {
|
| 225 |
+
console.log(' 📝 Transforming callouts to Note components...');
|
| 226 |
+
|
| 227 |
+
let transformedCount = 0;
|
| 228 |
+
|
| 229 |
+
// Transform blockquotes that look like Notion callouts
|
| 230 |
+
content = content.replace(/^> \*\*([^*]+)\*\*\s*\n> (.+?)(?=\n> \*\*|\n\n|\n$)/gms, (match, title, content) => {
|
| 231 |
+
transformedCount++;
|
| 232 |
+
usedComponents.add('Note');
|
| 233 |
+
|
| 234 |
+
const cleanContent = content
|
| 235 |
+
.replace(/^> /gm, '') // Remove blockquote markers
|
| 236 |
+
.replace(/\n+/g, '\n') // Normalize newlines
|
| 237 |
+
.trim();
|
| 238 |
+
|
| 239 |
+
return `<Note type="${title.toLowerCase()}" title="${title}">\n${cleanContent}\n</Note>\n\n`;
|
| 240 |
+
});
|
| 241 |
+
|
| 242 |
+
if (transformedCount > 0) {
|
| 243 |
+
console.log(` ✅ Transformed ${transformedCount} callout(s) to Note components`);
|
| 244 |
+
}
|
| 245 |
+
|
| 246 |
+
return content;
|
| 247 |
+
}
|
| 248 |
+
|
| 249 |
+
/**
|
| 250 |
+
* Transform Notion databases/tables to enhanced table components
|
| 251 |
+
* @param {string} content - MDX content
|
| 252 |
+
* @returns {string} - Content with enhanced tables
|
| 253 |
+
*/
|
| 254 |
+
function transformTables(content) {
|
| 255 |
+
console.log(' 📊 Enhancing tables...');
|
| 256 |
+
|
| 257 |
+
let enhancedCount = 0;
|
| 258 |
+
|
| 259 |
+
// Wrap tables in a container for better styling
|
| 260 |
+
content = content.replace(/^(\|[^|\n]+\|[\s\S]*?)(?=\n\n|\n$)/gm, (match) => {
|
| 261 |
+
if (match.includes('|') && match.split('\n').length > 2) {
|
| 262 |
+
enhancedCount++;
|
| 263 |
+
return `<div class="table-container">\n\n${match}\n\n</div>`;
|
| 264 |
+
}
|
| 265 |
+
return match;
|
| 266 |
+
});
|
| 267 |
+
|
| 268 |
+
if (enhancedCount > 0) {
|
| 269 |
+
console.log(` ✅ Enhanced ${enhancedCount} table(s)`);
|
| 270 |
+
}
|
| 271 |
+
|
| 272 |
+
return content;
|
| 273 |
+
}
|
| 274 |
+
|
| 275 |
+
/**
|
| 276 |
+
* Transform Notion code blocks to enhanced code components
|
| 277 |
+
* @param {string} content - MDX content
|
| 278 |
+
* @returns {string} - Content with enhanced code blocks
|
| 279 |
+
*/
|
| 280 |
+
function transformCodeBlocks(content) {
|
| 281 |
+
console.log(' 💻 Enhancing code blocks...');
|
| 282 |
+
|
| 283 |
+
let enhancedCount = 0;
|
| 284 |
+
|
| 285 |
+
// Add copy functionality to code blocks
|
| 286 |
+
content = content.replace(/^```(\w+)\n([\s\S]*?)\n```$/gm, (match, lang, code) => {
|
| 287 |
+
enhancedCount++;
|
| 288 |
+
return `\`\`\`${lang} copy\n${code}\n\`\`\``;
|
| 289 |
+
});
|
| 290 |
+
|
| 291 |
+
if (enhancedCount > 0) {
|
| 292 |
+
console.log(` ✅ Enhanced ${enhancedCount} code block(s)`);
|
| 293 |
+
}
|
| 294 |
+
|
| 295 |
+
return content;
|
| 296 |
+
}
|
| 297 |
+
|
| 298 |
+
/**
|
| 299 |
+
* Fix Notion-specific formatting issues
|
| 300 |
+
* @param {string} content - MDX content
|
| 301 |
+
* @returns {string} - Content with fixed formatting
|
| 302 |
+
*/
|
| 303 |
+
function fixNotionFormatting(content) {
|
| 304 |
+
console.log(' 🔧 Fixing Notion formatting issues...');
|
| 305 |
+
|
| 306 |
+
let fixedCount = 0;
|
| 307 |
+
|
| 308 |
+
// Fix Notion's toggle lists that don't convert well
|
| 309 |
+
content = content.replace(/^(\s*)•\s*(.+)$/gm, (match, indent, text) => {
|
| 310 |
+
fixedCount++;
|
| 311 |
+
return `${indent}- ${text}`;
|
| 312 |
+
});
|
| 313 |
+
|
| 314 |
+
// Fix Notion's numbered lists that might have issues
|
| 315 |
+
content = content.replace(/^(\s*)\d+\.\s*(.+)$/gm, (match, indent, text) => {
|
| 316 |
+
// Only fix if it's not already properly formatted
|
| 317 |
+
if (!text.includes('\n') || text.split('\n').length === 1) {
|
| 318 |
+
return match; // Keep as is
|
| 319 |
+
}
|
| 320 |
+
fixedCount++;
|
| 321 |
+
return `${indent}1. ${text}`;
|
| 322 |
+
});
|
| 323 |
+
|
| 324 |
+
// Fix Notion's bold/italic combinations
|
| 325 |
+
content = content.replace(/\*\*([^*]+)\*\*([^*]+)\*\*([^*]+)\*\*/g, (match, part1, part2, part3) => {
|
| 326 |
+
fixedCount++;
|
| 327 |
+
return `**${part1}${part2}${part3}**`;
|
| 328 |
+
});
|
| 329 |
+
|
| 330 |
+
if (fixedCount > 0) {
|
| 331 |
+
console.log(` ✅ Fixed ${fixedCount} formatting issue(s)`);
|
| 332 |
+
}
|
| 333 |
+
|
| 334 |
+
return content;
|
| 335 |
+
}
|
| 336 |
+
|
| 337 |
+
/**
|
| 338 |
+
* Ensure proper frontmatter for MDX with Notion metadata
|
| 339 |
+
* @param {string} content - MDX content
|
| 340 |
+
* @param {string} pageId - Notion page ID (optional)
|
| 341 |
+
* @param {string} notionToken - Notion API token (optional)
|
| 342 |
+
* @returns {string} - Content with proper frontmatter
|
| 343 |
+
*/
|
| 344 |
+
async function ensureFrontmatter(content, pageId = null, notionToken = null) {
|
| 345 |
+
console.log(' 📄 Ensuring proper frontmatter...');
|
| 346 |
+
|
| 347 |
+
if (!content.startsWith('---')) {
|
| 348 |
+
let frontmatter;
|
| 349 |
+
|
| 350 |
+
if (pageId && notionToken) {
|
| 351 |
+
try {
|
| 352 |
+
console.log(' 🔍 Extracting Notion metadata...');
|
| 353 |
+
frontmatter = await extractAndGenerateNotionFrontmatter(pageId, notionToken);
|
| 354 |
+
console.log(' ✅ Generated rich frontmatter from Notion');
|
| 355 |
+
} catch (error) {
|
| 356 |
+
console.log(' ⚠️ Failed to extract Notion metadata, using basic frontmatter');
|
| 357 |
+
frontmatter = generateBasicFrontmatter();
|
| 358 |
+
}
|
| 359 |
+
} else {
|
| 360 |
+
frontmatter = generateBasicFrontmatter();
|
| 361 |
+
console.log(' ✅ Generated basic frontmatter');
|
| 362 |
+
}
|
| 363 |
+
|
| 364 |
+
return frontmatter + content;
|
| 365 |
+
}
|
| 366 |
+
|
| 367 |
+
// Parse existing frontmatter and enhance it
|
| 368 |
+
try {
|
| 369 |
+
const { data, content: body } = matter(content);
|
| 370 |
+
|
| 371 |
+
// If we have Notion metadata available, try to enhance the frontmatter
|
| 372 |
+
if (pageId && notionToken && (!data.notion_id || data.notion_id !== pageId)) {
|
| 373 |
+
try {
|
| 374 |
+
console.log(' 🔍 Enhancing frontmatter with Notion metadata...');
|
| 375 |
+
const notionFrontmatter = await extractAndGenerateNotionFrontmatter(pageId, notionToken);
|
| 376 |
+
const { data: notionData } = matter(notionFrontmatter);
|
| 377 |
+
|
| 378 |
+
// Merge Notion metadata with existing frontmatter
|
| 379 |
+
const enhancedData = { ...data, ...notionData };
|
| 380 |
+
const enhancedContent = matter.stringify(body, enhancedData);
|
| 381 |
+
console.log(' ✅ Enhanced frontmatter with Notion metadata');
|
| 382 |
+
return enhancedContent;
|
| 383 |
+
} catch (error) {
|
| 384 |
+
console.log(' ⚠️ Could not enhance with Notion metadata, keeping existing');
|
| 385 |
+
}
|
| 386 |
+
}
|
| 387 |
+
|
| 388 |
+
// Ensure required fields
|
| 389 |
+
if (!data.title) data.title = 'Notion Article';
|
| 390 |
+
if (!data.published) data.published = new Date().toISOString().split('T')[0];
|
| 391 |
+
if (!data.tableOfContentsAutoCollapse) data.tableOfContentsAutoCollapse = true;
|
| 392 |
+
|
| 393 |
+
const enhancedContent = matter.stringify(body, data);
|
| 394 |
+
console.log(' ✅ Enhanced existing frontmatter');
|
| 395 |
+
return enhancedContent;
|
| 396 |
+
} catch (error) {
|
| 397 |
+
console.log(' ⚠️ Could not parse frontmatter, keeping as is');
|
| 398 |
+
return content;
|
| 399 |
+
}
|
| 400 |
+
}
|
| 401 |
+
|
| 402 |
+
/**
|
| 403 |
+
* Generate basic frontmatter
|
| 404 |
+
* @returns {string} - Basic frontmatter
|
| 405 |
+
*/
|
| 406 |
+
function generateBasicFrontmatter() {
|
| 407 |
+
const currentDate = new Date().toLocaleDateString('en-US', {
|
| 408 |
+
year: 'numeric',
|
| 409 |
+
month: 'short',
|
| 410 |
+
day: '2-digit'
|
| 411 |
+
});
|
| 412 |
+
return `---
|
| 413 |
+
title: "Notion Article"
|
| 414 |
+
published: "${currentDate}"
|
| 415 |
+
tableOfContentsAutoCollapse: true
|
| 416 |
+
---
|
| 417 |
+
|
| 418 |
+
`;
|
| 419 |
+
}
|
| 420 |
+
|
| 421 |
+
/**
|
| 422 |
+
* Main MDX processing function that applies all transformations
|
| 423 |
+
* @param {string} content - Raw Markdown content
|
| 424 |
+
* @param {string} pageId - Notion page ID (optional)
|
| 425 |
+
* @param {string} notionToken - Notion API token (optional)
|
| 426 |
+
* @returns {string} - Processed MDX content compatible with Astro
|
| 427 |
+
*/
|
| 428 |
+
async function processMdxContent(content, pageId = null, notionToken = null) {
|
| 429 |
+
console.log('🔧 Processing for Astro MDX compatibility...');
|
| 430 |
+
|
| 431 |
+
// Clear previous tracking
|
| 432 |
+
usedComponents.clear();
|
| 433 |
+
imageImports.clear();
|
| 434 |
+
|
| 435 |
+
let processedContent = content;
|
| 436 |
+
|
| 437 |
+
// Apply each transformation step sequentially
|
| 438 |
+
processedContent = await ensureFrontmatter(processedContent, pageId, notionToken);
|
| 439 |
+
processedContent = fixNotionFormatting(processedContent);
|
| 440 |
+
processedContent = transformCallouts(processedContent);
|
| 441 |
+
processedContent = transformImages(processedContent);
|
| 442 |
+
processedContent = transformTables(processedContent);
|
| 443 |
+
processedContent = transformCodeBlocks(processedContent);
|
| 444 |
+
|
| 445 |
+
// Add component imports at the end
|
| 446 |
+
processedContent = addComponentImports(processedContent);
|
| 447 |
+
|
| 448 |
+
return processedContent;
|
| 449 |
+
}
|
| 450 |
+
|
| 451 |
+
/**
|
| 452 |
+
* Convert a single markdown file to MDX
|
| 453 |
+
* @param {string} inputFile - Input markdown file
|
| 454 |
+
* @param {string} outputDir - Output directory
|
| 455 |
+
* @param {string} pageId - Notion page ID (optional)
|
| 456 |
+
* @param {string} notionToken - Notion API token (optional)
|
| 457 |
+
*/
|
| 458 |
+
async function convertFileToMdx(inputFile, outputDir, pageId = null, notionToken = null) {
|
| 459 |
+
const filename = basename(inputFile, '.md');
|
| 460 |
+
const outputFile = join(outputDir, `${filename}.mdx`);
|
| 461 |
+
|
| 462 |
+
console.log(`📝 Converting: ${basename(inputFile)} → ${basename(outputFile)}`);
|
| 463 |
+
|
| 464 |
+
try {
|
| 465 |
+
const markdownContent = readFileSync(inputFile, 'utf8');
|
| 466 |
+
const mdxContent = await processMdxContent(markdownContent, pageId, notionToken);
|
| 467 |
+
writeFileSync(outputFile, mdxContent);
|
| 468 |
+
|
| 469 |
+
console.log(` ✅ Converted: ${outputFile}`);
|
| 470 |
+
|
| 471 |
+
// Show file size
|
| 472 |
+
const inputSize = Math.round(markdownContent.length / 1024);
|
| 473 |
+
const outputSize = Math.round(mdxContent.length / 1024);
|
| 474 |
+
console.log(` 📊 Input: ${inputSize}KB → Output: ${outputSize}KB`);
|
| 475 |
+
|
| 476 |
+
} catch (error) {
|
| 477 |
+
console.error(` ❌ Failed to convert ${inputFile}: ${error.message}`);
|
| 478 |
+
}
|
| 479 |
+
}
|
| 480 |
+
|
| 481 |
+
/**
|
| 482 |
+
* Convert all markdown files in a directory to MDX
|
| 483 |
+
* @param {string} inputPath - Input path (file or directory)
|
| 484 |
+
* @param {string} outputDir - Output directory
|
| 485 |
+
* @param {string} pageId - Notion page ID (optional)
|
| 486 |
+
* @param {string} notionToken - Notion API token (optional)
|
| 487 |
+
*/
|
| 488 |
+
async function convertToMdx(inputPath, outputDir, pageId = null, notionToken = null) {
|
| 489 |
+
console.log('📝 Notion Markdown to Astro MDX Converter');
|
| 490 |
+
console.log(`📁 Input: ${inputPath}`);
|
| 491 |
+
console.log(`📁 Output: ${outputDir}`);
|
| 492 |
+
|
| 493 |
+
// Check if input exists
|
| 494 |
+
if (!existsSync(inputPath)) {
|
| 495 |
+
console.error(`❌ Input not found: ${inputPath}`);
|
| 496 |
+
process.exit(1);
|
| 497 |
+
}
|
| 498 |
+
|
| 499 |
+
try {
|
| 500 |
+
// Ensure output directory exists
|
| 501 |
+
if (!existsSync(outputDir)) {
|
| 502 |
+
mkdirSync(outputDir, { recursive: true });
|
| 503 |
+
}
|
| 504 |
+
|
| 505 |
+
let filesToConvert = [];
|
| 506 |
+
|
| 507 |
+
if (statSync(inputPath).isDirectory()) {
|
| 508 |
+
// Convert all .md files in directory
|
| 509 |
+
const files = readdirSync(inputPath);
|
| 510 |
+
filesToConvert = files
|
| 511 |
+
.filter(file => file.endsWith('.md'))
|
| 512 |
+
.map(file => join(inputPath, file));
|
| 513 |
+
} else if (inputPath.endsWith('.md')) {
|
| 514 |
+
// Convert single file
|
| 515 |
+
filesToConvert = [inputPath];
|
| 516 |
+
} else {
|
| 517 |
+
console.error('❌ Input must be a .md file or directory containing .md files');
|
| 518 |
+
process.exit(1);
|
| 519 |
+
}
|
| 520 |
+
|
| 521 |
+
if (filesToConvert.length === 0) {
|
| 522 |
+
console.log('ℹ️ No .md files found to convert');
|
| 523 |
+
return;
|
| 524 |
+
}
|
| 525 |
+
|
| 526 |
+
console.log(`🔄 Found ${filesToConvert.length} file(s) to convert`);
|
| 527 |
+
|
| 528 |
+
// Convert each file
|
| 529 |
+
for (const file of filesToConvert) {
|
| 530 |
+
await convertFileToMdx(file, outputDir, pageId, notionToken);
|
| 531 |
+
}
|
| 532 |
+
|
| 533 |
+
console.log(`✅ Conversion completed! ${filesToConvert.length} file(s) processed`);
|
| 534 |
+
|
| 535 |
+
} catch (error) {
|
| 536 |
+
console.error('❌ Conversion failed:', error.message);
|
| 537 |
+
process.exit(1);
|
| 538 |
+
}
|
| 539 |
+
}
|
| 540 |
+
|
| 541 |
+
export { convertToMdx };
|
| 542 |
+
|
| 543 |
+
function main() {
|
| 544 |
+
const config = parseArgs();
|
| 545 |
+
convertToMdx(config.input, config.output);
|
| 546 |
+
console.log('🎉 MDX conversion completed!');
|
| 547 |
+
}
|
| 548 |
+
|
| 549 |
+
if (import.meta.url === `file://${process.argv[1]}`) {
|
| 550 |
+
main();
|
| 551 |
+
}
|
app/scripts/notion-importer/notion-converter.mjs
ADDED
|
@@ -0,0 +1,259 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env node
|
| 2 |
+
|
| 3 |
+
import { config } from 'dotenv';
|
| 4 |
+
import { Client } from '@notionhq/client';
|
| 5 |
+
import { NotionConverter } from 'notion-to-md';
|
| 6 |
+
import { DefaultExporter } from 'notion-to-md/plugins/exporter';
|
| 7 |
+
import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
|
| 8 |
+
import { join, dirname, basename } from 'path';
|
| 9 |
+
import { fileURLToPath } from 'url';
|
| 10 |
+
import { postProcessMarkdown } from './post-processor.mjs';
|
| 11 |
+
import { createCustomCodeRenderer } from './custom-code-renderer.mjs';
|
| 12 |
+
|
| 13 |
+
// Load environment variables from .env file
|
| 14 |
+
config();
|
| 15 |
+
|
| 16 |
+
const __filename = fileURLToPath(import.meta.url);
|
| 17 |
+
const __dirname = dirname(__filename);
|
| 18 |
+
|
| 19 |
+
// Configuration
|
| 20 |
+
const DEFAULT_INPUT = join(__dirname, 'input', 'pages.json');
|
| 21 |
+
const DEFAULT_OUTPUT = join(__dirname, 'output');
|
| 22 |
+
|
| 23 |
+
function parseArgs() {
|
| 24 |
+
const args = process.argv.slice(2);
|
| 25 |
+
const config = {
|
| 26 |
+
input: DEFAULT_INPUT,
|
| 27 |
+
output: DEFAULT_OUTPUT,
|
| 28 |
+
clean: false,
|
| 29 |
+
token: process.env.NOTION_TOKEN
|
| 30 |
+
};
|
| 31 |
+
|
| 32 |
+
for (const arg of args) {
|
| 33 |
+
if (arg.startsWith('--input=')) {
|
| 34 |
+
config.input = arg.split('=')[1];
|
| 35 |
+
} else if (arg.startsWith('--output=')) {
|
| 36 |
+
config.output = arg.split('=')[1];
|
| 37 |
+
} else if (arg.startsWith('--token=')) {
|
| 38 |
+
config.token = arg.split('=')[1];
|
| 39 |
+
} else if (arg === '--clean') {
|
| 40 |
+
config.clean = true;
|
| 41 |
+
}
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
return config;
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
function ensureDirectory(dir) {
|
| 48 |
+
if (!existsSync(dir)) {
|
| 49 |
+
mkdirSync(dir, { recursive: true });
|
| 50 |
+
}
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
function loadPagesConfig(configFile) {
|
| 54 |
+
if (!existsSync(configFile)) {
|
| 55 |
+
console.error(`❌ Configuration file not found: ${configFile}`);
|
| 56 |
+
console.log('📝 Create a pages.json file with your Notion page IDs:');
|
| 57 |
+
console.log(`
|
| 58 |
+
{
|
| 59 |
+
"pages": [
|
| 60 |
+
{
|
| 61 |
+
"id": "your-notion-page-id-1",
|
| 62 |
+
"title": "Page Title 1",
|
| 63 |
+
"slug": "page-1"
|
| 64 |
+
},
|
| 65 |
+
{
|
| 66 |
+
"id": "your-notion-page-id-2",
|
| 67 |
+
"title": "Page Title 2",
|
| 68 |
+
"slug": "page-2"
|
| 69 |
+
}
|
| 70 |
+
]
|
| 71 |
+
}
|
| 72 |
+
`);
|
| 73 |
+
process.exit(1);
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
try {
|
| 77 |
+
const config = JSON.parse(readFileSync(configFile, 'utf8'));
|
| 78 |
+
return config.pages || [];
|
| 79 |
+
} catch (error) {
|
| 80 |
+
console.error(`❌ Error reading configuration: ${error.message}`);
|
| 81 |
+
process.exit(1);
|
| 82 |
+
}
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
/**
|
| 86 |
+
* Convert a single Notion page to Markdown with advanced media handling
|
| 87 |
+
* @param {Object} notion - Notion client
|
| 88 |
+
* @param {string} pageId - Notion page ID
|
| 89 |
+
* @param {string} outputDir - Output directory
|
| 90 |
+
* @param {string} pageTitle - Page title for file naming
|
| 91 |
+
* @returns {Promise<string>} - Path to generated markdown file
|
| 92 |
+
*/
|
| 93 |
+
async function convertNotionPage(notion, pageId, outputDir, pageTitle) {
|
| 94 |
+
console.log(`📄 Converting Notion page: ${pageTitle} (${pageId})`);
|
| 95 |
+
|
| 96 |
+
try {
|
| 97 |
+
// Create media directory for this page
|
| 98 |
+
const mediaDir = join(outputDir, 'media', pageId);
|
| 99 |
+
ensureDirectory(mediaDir);
|
| 100 |
+
|
| 101 |
+
// Configure the DefaultExporter to save to a file
|
| 102 |
+
const outputFile = join(outputDir, `${pageTitle}.md`);
|
| 103 |
+
const exporter = new DefaultExporter({
|
| 104 |
+
outputType: 'file',
|
| 105 |
+
outputPath: outputFile,
|
| 106 |
+
});
|
| 107 |
+
|
| 108 |
+
// Create the converter with media downloading strategy
|
| 109 |
+
const n2m = new NotionConverter(notion)
|
| 110 |
+
.withExporter(exporter)
|
| 111 |
+
// Download media to local directory with path transformation
|
| 112 |
+
.downloadMediaTo({
|
| 113 |
+
outputDir: mediaDir,
|
| 114 |
+
// Transform paths to be web-accessible
|
| 115 |
+
transformPath: (localPath) => `/media/${pageId}/${basename(localPath)}`,
|
| 116 |
+
});
|
| 117 |
+
|
| 118 |
+
// Convert the page
|
| 119 |
+
const result = await n2m.convert(pageId);
|
| 120 |
+
|
| 121 |
+
console.log(` ✅ Converted to: ${outputFile}`);
|
| 122 |
+
console.log(` 📊 Content length: ${result.content.length} characters`);
|
| 123 |
+
console.log(` 🖼️ Media saved to: ${mediaDir}`);
|
| 124 |
+
|
| 125 |
+
return outputFile;
|
| 126 |
+
|
| 127 |
+
} catch (error) {
|
| 128 |
+
console.error(` ❌ Failed to convert page ${pageId}: ${error.message}`);
|
| 129 |
+
throw error;
|
| 130 |
+
}
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
/**
|
| 134 |
+
* Process Notion pages with advanced configuration
|
| 135 |
+
* @param {string} inputFile - Path to pages configuration
|
| 136 |
+
* @param {string} outputDir - Output directory
|
| 137 |
+
* @param {string} notionToken - Notion API token
|
| 138 |
+
*/
|
| 139 |
+
export async function convertNotionToMarkdown(inputFile, outputDir, notionToken) {
|
| 140 |
+
console.log('🚀 Notion to Markdown Converter');
|
| 141 |
+
console.log(`📁 Input: ${inputFile}`);
|
| 142 |
+
console.log(`📁 Output: ${outputDir}`);
|
| 143 |
+
|
| 144 |
+
// Validate Notion token
|
| 145 |
+
if (!notionToken) {
|
| 146 |
+
console.error('❌ NOTION_TOKEN not found. Please set it as environment variable or use --token=YOUR_TOKEN');
|
| 147 |
+
process.exit(1);
|
| 148 |
+
}
|
| 149 |
+
|
| 150 |
+
// Ensure output directory exists
|
| 151 |
+
ensureDirectory(outputDir);
|
| 152 |
+
|
| 153 |
+
try {
|
| 154 |
+
// Initialize Notion client
|
| 155 |
+
const notion = new Client({
|
| 156 |
+
auth: notionToken,
|
| 157 |
+
});
|
| 158 |
+
|
| 159 |
+
// Load pages configuration
|
| 160 |
+
const pages = loadPagesConfig(inputFile);
|
| 161 |
+
console.log(`📋 Found ${pages.length} page(s) to convert`);
|
| 162 |
+
|
| 163 |
+
const convertedFiles = [];
|
| 164 |
+
|
| 165 |
+
// Convert each page
|
| 166 |
+
for (const page of pages) {
|
| 167 |
+
try {
|
| 168 |
+
const outputFile = await convertNotionPage(
|
| 169 |
+
notion,
|
| 170 |
+
page.id,
|
| 171 |
+
outputDir,
|
| 172 |
+
page.slug || page.title?.toLowerCase().replace(/\s+/g, '-') || page.id
|
| 173 |
+
);
|
| 174 |
+
convertedFiles.push(outputFile);
|
| 175 |
+
} catch (error) {
|
| 176 |
+
console.error(`❌ Failed to convert page ${page.id}: ${error.message}`);
|
| 177 |
+
// Continue with other pages
|
| 178 |
+
}
|
| 179 |
+
}
|
| 180 |
+
|
| 181 |
+
// Post-process all converted files
|
| 182 |
+
console.log('🔧 Post-processing converted files...');
|
| 183 |
+
for (const file of convertedFiles) {
|
| 184 |
+
try {
|
| 185 |
+
let content = readFileSync(file, 'utf8');
|
| 186 |
+
content = postProcessMarkdown(content);
|
| 187 |
+
writeFileSync(file, content);
|
| 188 |
+
console.log(` ✅ Post-processed: ${basename(file)}`);
|
| 189 |
+
} catch (error) {
|
| 190 |
+
console.error(` ❌ Failed to post-process ${file}: ${error.message}`);
|
| 191 |
+
}
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
console.log(`✅ Conversion completed! ${convertedFiles.length} file(s) generated`);
|
| 195 |
+
|
| 196 |
+
} catch (error) {
|
| 197 |
+
console.error('❌ Conversion failed:', error.message);
|
| 198 |
+
process.exit(1);
|
| 199 |
+
}
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
function main() {
|
| 203 |
+
const config = parseArgs();
|
| 204 |
+
|
| 205 |
+
if (config.clean) {
|
| 206 |
+
console.log('🧹 Cleaning output directory...');
|
| 207 |
+
// Clean output directory logic would go here
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
convertNotionToMarkdown(config.input, config.output, config.token);
|
| 211 |
+
console.log('🎉 Notion conversion completed!');
|
| 212 |
+
}
|
| 213 |
+
|
| 214 |
+
// Show help if requested
|
| 215 |
+
if (process.argv.includes('--help') || process.argv.includes('-h')) {
|
| 216 |
+
console.log(`
|
| 217 |
+
🚀 Notion to Markdown Converter
|
| 218 |
+
|
| 219 |
+
Usage:
|
| 220 |
+
node notion-converter.mjs [options]
|
| 221 |
+
|
| 222 |
+
Options:
|
| 223 |
+
--input=PATH Input pages configuration file (default: input/pages.json)
|
| 224 |
+
--output=PATH Output directory (default: output/)
|
| 225 |
+
--token=TOKEN Notion API token (or set NOTION_TOKEN env var)
|
| 226 |
+
--clean Clean output directory before conversion
|
| 227 |
+
--help, -h Show this help
|
| 228 |
+
|
| 229 |
+
Environment Variables:
|
| 230 |
+
NOTION_TOKEN Your Notion integration token
|
| 231 |
+
|
| 232 |
+
Examples:
|
| 233 |
+
# Basic conversion with environment token
|
| 234 |
+
NOTION_TOKEN=your_token node notion-converter.mjs
|
| 235 |
+
|
| 236 |
+
# Custom paths and token
|
| 237 |
+
node notion-converter.mjs --input=my-pages.json --output=converted/ --token=your_token
|
| 238 |
+
|
| 239 |
+
# Clean output first
|
| 240 |
+
node notion-converter.mjs --clean
|
| 241 |
+
|
| 242 |
+
Configuration File Format (pages.json):
|
| 243 |
+
{
|
| 244 |
+
"pages": [
|
| 245 |
+
{
|
| 246 |
+
"id": "your-notion-page-id",
|
| 247 |
+
"title": "Page Title",
|
| 248 |
+
"slug": "page-slug"
|
| 249 |
+
}
|
| 250 |
+
]
|
| 251 |
+
}
|
| 252 |
+
`);
|
| 253 |
+
process.exit(0);
|
| 254 |
+
}
|
| 255 |
+
|
| 256 |
+
// Run CLI if called directly
|
| 257 |
+
if (import.meta.url === `file://${process.argv[1]}`) {
|
| 258 |
+
main();
|
| 259 |
+
}
|
app/scripts/notion-importer/notion-metadata-extractor.mjs
ADDED
|
@@ -0,0 +1,303 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env node
|
| 2 |
+
|
| 3 |
+
import { Client } from '@notionhq/client';
|
| 4 |
+
|
| 5 |
+
/**
|
| 6 |
+
* Notion Metadata Extractor
|
| 7 |
+
* Extracts document metadata from Notion pages for frontmatter generation
|
| 8 |
+
*/
|
| 9 |
+
|
| 10 |
+
/**
|
| 11 |
+
* Extract metadata from Notion page
|
| 12 |
+
* @param {string} pageId - Notion page ID
|
| 13 |
+
* @param {string} notionToken - Notion API token
|
| 14 |
+
* @returns {object} - Extracted metadata object
|
| 15 |
+
*/
|
| 16 |
+
export async function extractNotionMetadata(pageId, notionToken) {
|
| 17 |
+
const notion = new Client({
|
| 18 |
+
auth: notionToken,
|
| 19 |
+
});
|
| 20 |
+
|
| 21 |
+
const metadata = {};
|
| 22 |
+
|
| 23 |
+
try {
|
| 24 |
+
// Get page information
|
| 25 |
+
const page = await notion.pages.retrieve({ page_id: pageId });
|
| 26 |
+
|
| 27 |
+
// Extract title from page properties
|
| 28 |
+
if (page.properties.title && page.properties.title.title && page.properties.title.title.length > 0) {
|
| 29 |
+
metadata.title = page.properties.title.title[0].plain_text;
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
// Extract creation date
|
| 33 |
+
if (page.created_time) {
|
| 34 |
+
metadata.published = new Date(page.created_time).toLocaleDateString('en-US', {
|
| 35 |
+
year: 'numeric',
|
| 36 |
+
month: 'short',
|
| 37 |
+
day: '2-digit'
|
| 38 |
+
});
|
| 39 |
+
metadata.created_time = page.created_time;
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
// Extract last edited date
|
| 43 |
+
if (page.last_edited_time) {
|
| 44 |
+
metadata.last_edited_time = page.last_edited_time;
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
// Extract created by
|
| 48 |
+
if (page.created_by && page.created_by.id) {
|
| 49 |
+
metadata.created_by = page.created_by.id;
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
// Extract last edited by
|
| 53 |
+
if (page.last_edited_by && page.last_edited_by.id) {
|
| 54 |
+
metadata.last_edited_by = page.last_edited_by.id;
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
// Extract page URL
|
| 58 |
+
metadata.notion_url = page.url;
|
| 59 |
+
|
| 60 |
+
// Extract page ID
|
| 61 |
+
metadata.notion_id = page.id;
|
| 62 |
+
|
| 63 |
+
// Extract parent information
|
| 64 |
+
if (page.parent) {
|
| 65 |
+
metadata.parent = {
|
| 66 |
+
type: page.parent.type,
|
| 67 |
+
id: page.parent[page.parent.type]?.id || page.parent[page.parent.type]
|
| 68 |
+
};
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
// Extract cover image if available
|
| 72 |
+
if (page.cover) {
|
| 73 |
+
metadata.cover = {
|
| 74 |
+
type: page.cover.type,
|
| 75 |
+
url: page.cover[page.cover.type]?.url || page.cover[page.cover.type]
|
| 76 |
+
};
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
// Extract icon if available
|
| 80 |
+
if (page.icon) {
|
| 81 |
+
metadata.icon = {
|
| 82 |
+
type: page.icon.type,
|
| 83 |
+
emoji: page.icon.emoji,
|
| 84 |
+
url: page.icon.external?.url || page.icon.file?.url
|
| 85 |
+
};
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
// Extract authors and custom properties
|
| 89 |
+
const customProperties = {};
|
| 90 |
+
for (const [key, value] of Object.entries(page.properties)) {
|
| 91 |
+
if (key !== 'title') { // Skip title as it's handled separately
|
| 92 |
+
const extractedValue = extractPropertyValue(value);
|
| 93 |
+
|
| 94 |
+
// Check for author-related properties
|
| 95 |
+
if (key.toLowerCase().includes('author') ||
|
| 96 |
+
key.toLowerCase().includes('writer') ||
|
| 97 |
+
key.toLowerCase().includes('creator') ||
|
| 98 |
+
value.type === 'people') {
|
| 99 |
+
metadata.authors = extractedValue;
|
| 100 |
+
} else {
|
| 101 |
+
customProperties[key] = extractedValue;
|
| 102 |
+
}
|
| 103 |
+
}
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
// If no authors found in properties, try to get from created_by
|
| 107 |
+
if (!metadata.authors && page.created_by) {
|
| 108 |
+
try {
|
| 109 |
+
const user = await notion.users.retrieve({ user_id: page.created_by.id });
|
| 110 |
+
metadata.authors = [{
|
| 111 |
+
name: user.name || user.id,
|
| 112 |
+
id: user.id
|
| 113 |
+
}];
|
| 114 |
+
} catch (error) {
|
| 115 |
+
console.log(' ⚠️ Could not fetch author from created_by:', error.message);
|
| 116 |
+
// Fallback to basic info
|
| 117 |
+
metadata.authors = [{
|
| 118 |
+
name: page.created_by.name || page.created_by.id,
|
| 119 |
+
id: page.created_by.id
|
| 120 |
+
}];
|
| 121 |
+
}
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
if (Object.keys(customProperties).length > 0) {
|
| 125 |
+
metadata.properties = customProperties;
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
// Try to extract description from page content (first paragraph)
|
| 129 |
+
try {
|
| 130 |
+
const blocks = await notion.blocks.children.list({ block_id: pageId });
|
| 131 |
+
const firstParagraph = blocks.results.find(block =>
|
| 132 |
+
block.type === 'paragraph' &&
|
| 133 |
+
block.paragraph.rich_text &&
|
| 134 |
+
block.paragraph.rich_text.length > 0
|
| 135 |
+
);
|
| 136 |
+
|
| 137 |
+
if (firstParagraph) {
|
| 138 |
+
const description = firstParagraph.paragraph.rich_text
|
| 139 |
+
.map(text => text.plain_text)
|
| 140 |
+
.join('')
|
| 141 |
+
.trim();
|
| 142 |
+
|
| 143 |
+
if (description && description.length > 0) {
|
| 144 |
+
metadata.description = description.substring(0, 200) + (description.length > 200 ? '...' : '');
|
| 145 |
+
}
|
| 146 |
+
}
|
| 147 |
+
} catch (error) {
|
| 148 |
+
console.log(' ⚠️ Could not extract description from page content');
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
// Generate tags from page properties
|
| 152 |
+
const tags = [];
|
| 153 |
+
for (const [key, value] of Object.entries(page.properties)) {
|
| 154 |
+
if (value.type === 'multi_select' && value.multi_select) {
|
| 155 |
+
value.multi_select.forEach(option => {
|
| 156 |
+
tags.push(option.name);
|
| 157 |
+
});
|
| 158 |
+
} else if (value.type === 'select' && value.select) {
|
| 159 |
+
tags.push(value.select.name);
|
| 160 |
+
}
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
if (tags.length > 0) {
|
| 164 |
+
metadata.tags = tags;
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
} catch (error) {
|
| 168 |
+
console.error('Error extracting Notion metadata:', error.message);
|
| 169 |
+
// Return basic metadata if extraction fails
|
| 170 |
+
metadata.title = "Notion Article";
|
| 171 |
+
metadata.published = new Date().toLocaleDateString('en-US', {
|
| 172 |
+
year: 'numeric',
|
| 173 |
+
month: 'short',
|
| 174 |
+
day: '2-digit'
|
| 175 |
+
});
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
return metadata;
|
| 179 |
+
}
|
| 180 |
+
|
| 181 |
+
/**
|
| 182 |
+
* Extract value from Notion property
|
| 183 |
+
* @param {object} property - Notion property object
|
| 184 |
+
* @returns {any} - Extracted value
|
| 185 |
+
*/
|
| 186 |
+
function extractPropertyValue(property) {
|
| 187 |
+
switch (property.type) {
|
| 188 |
+
case 'rich_text':
|
| 189 |
+
return property.rich_text.map(text => text.plain_text).join('');
|
| 190 |
+
case 'title':
|
| 191 |
+
return property.title.map(text => text.plain_text).join('');
|
| 192 |
+
case 'number':
|
| 193 |
+
return property.number;
|
| 194 |
+
case 'select':
|
| 195 |
+
return property.select?.name || null;
|
| 196 |
+
case 'multi_select':
|
| 197 |
+
return property.multi_select.map(option => option.name);
|
| 198 |
+
case 'date':
|
| 199 |
+
return property.date?.start || null;
|
| 200 |
+
case 'checkbox':
|
| 201 |
+
return property.checkbox;
|
| 202 |
+
case 'url':
|
| 203 |
+
return property.url;
|
| 204 |
+
case 'email':
|
| 205 |
+
return property.email;
|
| 206 |
+
case 'phone_number':
|
| 207 |
+
return property.phone_number;
|
| 208 |
+
case 'created_time':
|
| 209 |
+
return property.created_time;
|
| 210 |
+
case 'created_by':
|
| 211 |
+
return property.created_by?.id || null;
|
| 212 |
+
case 'last_edited_time':
|
| 213 |
+
return property.last_edited_time;
|
| 214 |
+
case 'last_edited_by':
|
| 215 |
+
return property.last_edited_by?.id || null;
|
| 216 |
+
case 'people':
|
| 217 |
+
return property.people.map(person => ({
|
| 218 |
+
name: person.name || person.id,
|
| 219 |
+
id: person.id
|
| 220 |
+
}));
|
| 221 |
+
default:
|
| 222 |
+
return null;
|
| 223 |
+
}
|
| 224 |
+
}
|
| 225 |
+
|
| 226 |
+
/**
|
| 227 |
+
* Generate YAML frontmatter from metadata object
|
| 228 |
+
* @param {object} metadata - Metadata object
|
| 229 |
+
* @returns {string} - YAML frontmatter string
|
| 230 |
+
*/
|
| 231 |
+
export function generateNotionFrontmatter(metadata) {
|
| 232 |
+
let frontmatter = '---\n';
|
| 233 |
+
|
| 234 |
+
// Title
|
| 235 |
+
if (metadata.title) {
|
| 236 |
+
frontmatter += `title: "${metadata.title}"\n`;
|
| 237 |
+
}
|
| 238 |
+
|
| 239 |
+
// Description
|
| 240 |
+
if (metadata.description) {
|
| 241 |
+
frontmatter += `description: "${metadata.description}"\n`;
|
| 242 |
+
}
|
| 243 |
+
|
| 244 |
+
// Publication date
|
| 245 |
+
if (metadata.published) {
|
| 246 |
+
frontmatter += `published: "${metadata.published}"\n`;
|
| 247 |
+
}
|
| 248 |
+
|
| 249 |
+
// Authors
|
| 250 |
+
if (metadata.authors && metadata.authors.length > 0) {
|
| 251 |
+
frontmatter += 'authors:\n';
|
| 252 |
+
metadata.authors.forEach(author => {
|
| 253 |
+
if (typeof author === 'string') {
|
| 254 |
+
frontmatter += ` - name: "${author}"\n`;
|
| 255 |
+
} else if (author.name) {
|
| 256 |
+
frontmatter += ` - name: "${author.name}"\n`;
|
| 257 |
+
}
|
| 258 |
+
});
|
| 259 |
+
}
|
| 260 |
+
|
| 261 |
+
// Tags
|
| 262 |
+
if (metadata.tags && metadata.tags.length > 0) {
|
| 263 |
+
frontmatter += 'tags:\n';
|
| 264 |
+
metadata.tags.forEach(tag => {
|
| 265 |
+
frontmatter += ` - "${tag}"\n`;
|
| 266 |
+
});
|
| 267 |
+
}
|
| 268 |
+
|
| 269 |
+
// Notion metadata removed - keeping only standard frontmatter fields
|
| 270 |
+
|
| 271 |
+
// Cover image
|
| 272 |
+
if (metadata.cover && metadata.cover.url) {
|
| 273 |
+
frontmatter += `cover: "${metadata.cover.url}"\n`;
|
| 274 |
+
}
|
| 275 |
+
|
| 276 |
+
// Icon
|
| 277 |
+
if (metadata.icon) {
|
| 278 |
+
if (metadata.icon.emoji) {
|
| 279 |
+
frontmatter += `icon: "${metadata.icon.emoji}"\n`;
|
| 280 |
+
} else if (metadata.icon.url) {
|
| 281 |
+
frontmatter += `icon: "${metadata.icon.url}"\n`;
|
| 282 |
+
}
|
| 283 |
+
}
|
| 284 |
+
|
| 285 |
+
// Custom properties removed - keeping frontmatter clean and standard
|
| 286 |
+
|
| 287 |
+
// Default Astro configuration
|
| 288 |
+
frontmatter += 'tableOfContentsAutoCollapse: true\n';
|
| 289 |
+
frontmatter += '---\n\n';
|
| 290 |
+
|
| 291 |
+
return frontmatter;
|
| 292 |
+
}
|
| 293 |
+
|
| 294 |
+
/**
|
| 295 |
+
* Extract and generate frontmatter from Notion page
|
| 296 |
+
* @param {string} pageId - Notion page ID
|
| 297 |
+
* @param {string} notionToken - Notion API token
|
| 298 |
+
* @returns {string} - Complete YAML frontmatter
|
| 299 |
+
*/
|
| 300 |
+
export async function extractAndGenerateNotionFrontmatter(pageId, notionToken) {
|
| 301 |
+
const metadata = await extractNotionMetadata(pageId, notionToken);
|
| 302 |
+
return generateNotionFrontmatter(metadata);
|
| 303 |
+
}
|
app/scripts/notion-importer/output/.temp-pages.json
ADDED
|
Binary file (128 Bytes). View file
|
|
|
app/scripts/notion-importer/output/media/27877f1c9c9d804d9c82f7b3905578ff/image_27877f1c-9c9d-8013-b668-f14bd1ac0ec0.png
ADDED
|
app/scripts/notion-importer/output/media/27877f1c9c9d804d9c82f7b3905578ff/image_27877f1c-9c9d-8014-834f-d700b623256b.png
ADDED
|
app/scripts/notion-importer/output/media/27877f1c9c9d804d9c82f7b3905578ff/image_27877f1c-9c9d-801d-841a-e35011491566.png
ADDED
|
app/scripts/notion-importer/output/media/27877f1c9c9d804d9c82f7b3905578ff/image_27877f1c-9c9d-8031-ac8d-c5678af1bdd5.png
ADDED
|
app/scripts/notion-importer/output/media/27877f1c9c9d804d9c82f7b3905578ff/image_27877f1c-9c9d-8048-9b7e-db4fa7485915.png
ADDED
|
app/scripts/notion-importer/output/media/27877f1c9c9d804d9c82f7b3905578ff/image_27877f1c-9c9d-804d-bd0a-e0b1c15e504f.png
ADDED
|
app/scripts/notion-importer/output/media/27877f1c9c9d804d9c82f7b3905578ff/image_27877f1c-9c9d-8075-ae2e-dc24fe9296ca.png
ADDED
|