tfrere HF Staff commited on
Commit
fd07c57
·
1 Parent(s): 01fdd2d
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +14 -35
  2. .gitignore +39 -0
  3. CHANGELOG.md +118 -0
  4. CONTRIBUTING.md +196 -0
  5. LICENSE +33 -0
  6. README.md +116 -5
  7. app/astro.config.mjs +76 -0
  8. app/package-lock.json +0 -0
  9. app/plugins/rehype/code-copy.mjs +94 -0
  10. app/plugins/rehype/post-citation.mjs +441 -0
  11. app/plugins/rehype/restore-at-in-code.mjs +22 -0
  12. app/plugins/rehype/wrap-outputs.mjs +38 -0
  13. app/plugins/rehype/wrap-tables.mjs +43 -0
  14. app/plugins/remark/ignore-citations-in-code.mjs +21 -0
  15. app/plugins/remark/output-container.mjs +23 -0
  16. app/plugins/remark/outputs-container.mjs +23 -0
  17. app/postcss.config.mjs +14 -0
  18. app/public/data +1 -0
  19. app/public/scripts/color-palettes.js +274 -0
  20. app/scripts/export-latex.mjs +318 -0
  21. app/scripts/export-pdf.mjs +483 -0
  22. app/scripts/generate-trackio-data.mjs +196 -0
  23. app/scripts/jitter-trackio-data.mjs +129 -0
  24. app/scripts/latex-importer/README.md +169 -0
  25. app/scripts/latex-importer/bib-cleaner.mjs +104 -0
  26. app/scripts/latex-importer/filters/equation-ids.lua +134 -0
  27. app/scripts/latex-importer/index.mjs +138 -0
  28. app/scripts/latex-importer/latex-converter.mjs +330 -0
  29. app/scripts/latex-importer/mdx-converter.mjs +896 -0
  30. app/scripts/latex-importer/metadata-extractor.mjs +170 -0
  31. app/scripts/latex-importer/package-lock.json +0 -0
  32. app/scripts/latex-importer/package.json +0 -0
  33. app/scripts/latex-importer/post-processor.mjs +439 -0
  34. app/scripts/latex-importer/reference-preprocessor.mjs +239 -0
  35. app/scripts/notion-importer/.cursorignore +1 -0
  36. app/scripts/notion-importer/.notion-to-md/media/27877f1c-9c9d-804d-9c82-f7b3905578ff_media.json +3 -0
  37. app/scripts/notion-importer/custom-code-renderer.mjs +33 -0
  38. app/scripts/notion-importer/debug-properties.mjs +87 -0
  39. app/scripts/notion-importer/input/pages.json +3 -0
  40. app/scripts/notion-importer/mdx-converter.mjs +551 -0
  41. app/scripts/notion-importer/notion-converter.mjs +259 -0
  42. app/scripts/notion-importer/notion-metadata-extractor.mjs +303 -0
  43. app/scripts/notion-importer/output/.temp-pages.json +0 -0
  44. app/scripts/notion-importer/output/media/27877f1c9c9d804d9c82f7b3905578ff/image_27877f1c-9c9d-8013-b668-f14bd1ac0ec0.png +0 -0
  45. app/scripts/notion-importer/output/media/27877f1c9c9d804d9c82f7b3905578ff/image_27877f1c-9c9d-8014-834f-d700b623256b.png +0 -0
  46. app/scripts/notion-importer/output/media/27877f1c9c9d804d9c82f7b3905578ff/image_27877f1c-9c9d-801d-841a-e35011491566.png +0 -0
  47. app/scripts/notion-importer/output/media/27877f1c9c9d804d9c82f7b3905578ff/image_27877f1c-9c9d-8031-ac8d-c5678af1bdd5.png +0 -0
  48. app/scripts/notion-importer/output/media/27877f1c9c9d804d9c82f7b3905578ff/image_27877f1c-9c9d-8048-9b7e-db4fa7485915.png +0 -0
  49. app/scripts/notion-importer/output/media/27877f1c9c9d804d9c82f7b3905578ff/image_27877f1c-9c9d-804d-bd0a-e0b1c15e504f.png +0 -0
  50. app/scripts/notion-importer/output/media/27877f1c9c9d804d9c82f7b3905578ff/image_27877f1c-9c9d-8075-ae2e-dc24fe9296ca.png +0 -0
.gitattributes CHANGED
@@ -1,35 +1,14 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.png filter=lfs diff=lfs merge=lfs -text
2
+ *.jpg filter=lfs diff=lfs merge=lfs -text
3
+ *.jpeg filter=lfs diff=lfs merge=lfs -text
4
+ *.gif filter=lfs diff=lfs merge=lfs -text
5
+ *.mp3 filter=lfs diff=lfs merge=lfs -text
6
+ *.mp4 filter=lfs diff=lfs merge=lfs -text
7
+ *.mov filter=lfs diff=lfs merge=lfs -text
8
+ *.avi filter=lfs diff=lfs merge=lfs -text
9
+ *.wav filter=lfs diff=lfs merge=lfs -text
10
+ *.csv filter=lfs diff=lfs merge=lfs -text
11
+ *.json filter=lfs diff=lfs merge=lfs -text
12
+ # the package and package lock should not be tracked
13
+ package.json -filter -diff -merge text
14
+ package-lock.json -filter -diff -merge text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitignore ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__
3
+ *.py[cod]
4
+ *.so
5
+ .Python
6
+ env/
7
+ venv/
8
+ *.egg-info/
9
+ dist/
10
+ build/
11
+ *.egg
12
+ .idea/
13
+ .vscode/
14
+ .astro/
15
+ .claude/
16
+ *.swp
17
+ .DS_Store
18
+ # Node
19
+ node_modules/
20
+ *.log
21
+ *.env
22
+ *.cache
23
+
24
+ app/scripts/latex-to-mdx/output/
25
+ app/src/content/embeds/typography/generated
26
+
27
+ # PDF export
28
+ app/public/*.pdf
29
+ app/public/*.png
30
+ app/public/*.jpg
31
+ app/public/data/**/*
32
+
33
+ .astro/
34
+
35
+ # Template sync temporary directories
36
+ .template-sync/
37
+ .temp-*/
38
+ .backup-*/
39
+
CHANGELOG.md ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Changelog
2
+
3
+ All notable changes to the Research Article Template will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [Unreleased]
9
+
10
+ ### Added
11
+ - Initial open source release
12
+ - Comprehensive documentation
13
+ - Contributing guidelines
14
+ - License file
15
+
16
+ ## [1.0.0] - 2024-12-19
17
+
18
+ ### Added
19
+ - **Core Features**:
20
+ - Markdown/MDX-based writing system
21
+ - KaTeX mathematical notation support
22
+ - Syntax highlighting for code blocks
23
+ - Academic citations with BibTeX integration
24
+ - Footnotes and sidenotes system
25
+ - Auto-generated table of contents
26
+ - Interactive Mermaid diagrams
27
+ - Plotly.js and D3.js integration
28
+ - HTML embed support
29
+ - Gradio app embedding
30
+ - Dataviz color palettes
31
+ - Image optimization
32
+ - SEO-friendly structure
33
+ - Automatic PDF export
34
+ - Dark/light theme toggle
35
+ - Mobile-responsive design
36
+ - LaTeX import functionality
37
+ - Template synchronization system
38
+
39
+ - **Components**:
40
+ - Figure component with captions
41
+ - MultiFigure for image galleries
42
+ - Note component with variants
43
+ - Quote component
44
+ - Accordion for collapsible content
45
+ - Sidenote component
46
+ - Table of Contents
47
+ - Theme Toggle
48
+ - HTML Embed
49
+ - Raw HTML support
50
+ - SEO component
51
+ - Hero section
52
+ - Footer
53
+ - Full-width and wide layouts
54
+
55
+ - **Build System**:
56
+ - Astro 4.10.0 integration
57
+ - PostCSS with custom media queries
58
+ - Automatic compression
59
+ - Docker support
60
+ - Nginx configuration
61
+ - Git LFS support
62
+
63
+ - **Scripts**:
64
+ - PDF export functionality
65
+ - LaTeX to MDX conversion
66
+ - Template synchronization
67
+ - Font SVG generation
68
+ - TrackIO data generation
69
+
70
+ - **Documentation**:
71
+ - Getting started guide
72
+ - Writing best practices
73
+ - Component reference
74
+ - LaTeX conversion guide
75
+ - Interactive examples
76
+
77
+ ### Technical Details
78
+ - **Framework**: Astro 4.10.0
79
+ - **Styling**: PostCSS with custom properties
80
+ - **Math**: KaTeX 0.16.22
81
+ - **Charts**: Plotly.js 3.1.0, D3.js 7.9.0
82
+ - **Diagrams**: Mermaid 11.10.1
83
+ - **Node.js**: >=20.0.0
84
+ - **License**: CC-BY-4.0
85
+
86
+ ### Browser Support
87
+ - Chrome (latest)
88
+ - Firefox (latest)
89
+ - Safari (latest)
90
+ - Edge (latest)
91
+
92
+ ---
93
+
94
+ ## Version History
95
+
96
+ - **1.0.0**: Initial stable release with full feature set
97
+ - **0.0.1**: Development version (pre-release)
98
+
99
+ ## Migration Guide
100
+
101
+ ### From 0.0.1 to 1.0.0
102
+
103
+ This is the first stable release. No breaking changes from the development version.
104
+
105
+ ### Updating Your Project
106
+
107
+ Use the template synchronization system to update:
108
+
109
+ ```bash
110
+ npm run sync:template -- --dry-run # Preview changes
111
+ npm run sync:template # Apply updates
112
+ ```
113
+
114
+ ## Support
115
+
116
+ - **Documentation**: [Hugging Face Space](https://huggingface.co/spaces/tfrere/research-article-template)
117
+ - **Issues**: [Community Discussions](https://huggingface.co/spaces/tfrere/research-article-template/discussions)
118
+ - **Contact**: [@tfrere](https://huggingface.co/tfrere)
CONTRIBUTING.md ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Contributing to Research Article Template
2
+
3
+ Thank you for your interest in contributing to the Research Article Template! This document provides guidelines and information for contributors.
4
+
5
+ ## 🤝 How to Contribute
6
+
7
+ ### Reporting Issues
8
+
9
+ Before creating an issue, please:
10
+ 1. **Search existing issues** to avoid duplicates
11
+ 2. **Use the issue template** when available
12
+ 3. **Provide detailed information**:
13
+ - Clear description of the problem
14
+ - Steps to reproduce
15
+ - Expected vs actual behavior
16
+ - Environment details (OS, Node.js version, browser)
17
+ - Screenshots if applicable
18
+
19
+ ### Suggesting Features
20
+
21
+ We welcome feature suggestions! Please:
22
+ 1. **Check existing discussions** first
23
+ 2. **Describe the use case** clearly
24
+ 3. **Explain the benefits** for the community
25
+ 4. **Consider implementation complexity**
26
+
27
+ ### Code Contributions
28
+
29
+ #### Getting Started
30
+
31
+ 1. **Fork the repository** on Hugging Face
32
+ 2. **Clone your fork**:
33
+ ```bash
34
+ git clone git@hf.co:spaces/<your-username>/research-article-template
35
+ cd research-article-template
36
+ ```
37
+ 3. **Install dependencies**:
38
+ ```bash
39
+ cd app
40
+ npm install
41
+ ```
42
+ 4. **Create a feature branch**:
43
+ ```bash
44
+ git checkout -b feature/your-feature-name
45
+ ```
46
+
47
+ #### Development Workflow
48
+
49
+ 1. **Make your changes** following our coding standards
50
+ 2. **Test thoroughly**:
51
+ ```bash
52
+ npm run dev # Test locally
53
+ npm run build # Ensure build works
54
+ ```
55
+ 3. **Update documentation** if needed
56
+ 4. **Commit with clear messages**:
57
+ ```bash
58
+ git commit -m "feat: add new component for interactive charts"
59
+ ```
60
+
61
+ #### Pull Request Process
62
+
63
+ 1. **Push your branch**:
64
+ ```bash
65
+ git push origin feature/your-feature-name
66
+ ```
67
+ 2. **Create a Pull Request** with:
68
+ - Clear title and description
69
+ - Reference related issues
70
+ - Screenshots for UI changes
71
+ - Testing instructions
72
+
73
+ ## 📋 Coding Standards
74
+
75
+ ### Code Style
76
+
77
+ - **Use Prettier** for consistent formatting
78
+ - **Follow existing patterns** in the codebase
79
+ - **Write clear, self-documenting code**
80
+ - **Add comments** for complex logic
81
+ - **Use meaningful variable names**
82
+
83
+ ### File Organization
84
+
85
+ - **Components**: Place in `src/components/`
86
+ - **Styles**: Use CSS modules or component-scoped styles
87
+ - **Assets**: Organize in `src/content/assets/`
88
+ - **Documentation**: Update relevant `.mdx` files
89
+
90
+ ### Commit Message Format
91
+
92
+ We follow [Conventional Commits](https://www.conventionalcommits.org/):
93
+
94
+ ```
95
+ type(scope): description
96
+
97
+ feat: add new interactive chart component
98
+ fix: resolve mobile layout issues
99
+ docs: update installation instructions
100
+ style: improve button hover states
101
+ refactor: simplify component structure
102
+ test: add unit tests for utility functions
103
+ ```
104
+
105
+ **Types**: `feat`, `fix`, `docs`, `style`, `refactor`, `test`, `chore`
106
+
107
+ ## 🧪 Testing
108
+
109
+ ### Manual Testing
110
+
111
+ Before submitting:
112
+ - [ ] Test on different screen sizes
113
+ - [ ] Verify dark/light theme compatibility
114
+ - [ ] Check browser compatibility (Chrome, Firefox, Safari)
115
+ - [ ] Test with different content types
116
+ - [ ] Ensure accessibility standards
117
+
118
+ ### Automated Testing
119
+
120
+ ```bash
121
+ # Run build to catch errors
122
+ npm run build
123
+
124
+ # Test PDF export
125
+ npm run export:pdf
126
+
127
+ # Test LaTeX conversion
128
+ npm run latex:convert
129
+ ```
130
+
131
+ ## 📚 Documentation
132
+
133
+ ### Writing Guidelines
134
+
135
+ - **Use clear, concise language**
136
+ - **Provide examples** for complex features
137
+ - **Include screenshots** for UI changes
138
+ - **Update both English content and code comments**
139
+
140
+ ### Documentation Structure
141
+
142
+ - **README.md**: Project overview and quick start
143
+ - **CONTRIBUTING.md**: This file
144
+ - **Content files**: In `src/content/chapters/demo/`
145
+ - **Component docs**: Inline comments and examples
146
+
147
+ ## 🎯 Areas for Contribution
148
+
149
+ ### High Priority
150
+
151
+ - **Bug fixes** and stability improvements
152
+ - **Accessibility enhancements**
153
+ - **Mobile responsiveness**
154
+ - **Performance optimizations**
155
+ - **Documentation improvements**
156
+
157
+ ### Feature Ideas
158
+
159
+ - **New interactive components**
160
+ - **Additional export formats**
161
+ - **Enhanced LaTeX import**
162
+ - **Theme customization**
163
+ - **Plugin system**
164
+
165
+ ### Community
166
+
167
+ - **Answer questions** in discussions
168
+ - **Share examples** of your work
169
+ - **Write tutorials** and guides
170
+ - **Help with translations**
171
+
172
+ ## 🚫 What Not to Contribute
173
+
174
+ - **Breaking changes** without discussion
175
+ - **Major architectural changes** without approval
176
+ - **Dependencies** that significantly increase bundle size
177
+ - **Features** that don't align with the project's goals
178
+
179
+ ## 📞 Getting Help
180
+
181
+ - **Discussions**: [Community tab](https://huggingface.co/spaces/tfrere/research-article-template/discussions)
182
+ - **Issues**: [Report bugs](https://huggingface.co/spaces/tfrere/research-article-template/discussions?status=open&type=issue)
183
+ - **Contact**: [@tfrere](https://huggingface.co/tfrere) on Hugging Face
184
+
185
+ ## 📄 License
186
+
187
+ By contributing, you agree that your contributions will be licensed under the same [CC-BY-4.0 license](LICENSE) that covers the project.
188
+
189
+ ## 🙏 Recognition
190
+
191
+ Contributors will be:
192
+ - **Listed in acknowledgments** (if desired)
193
+ - **Mentioned in release notes** for significant contributions
194
+ - **Credited** in relevant documentation
195
+
196
+ Thank you for helping make scientific writing more accessible and interactive! 🎉
LICENSE ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Creative Commons Attribution 4.0 International License
2
+
3
+ Copyright (c) 2024 Thibaud Frere
4
+
5
+ This work is licensed under the Creative Commons Attribution 4.0 International License.
6
+ To view a copy of this license, visit http://creativecommons.org/licenses/by/4.0/
7
+ or send a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
8
+
9
+ You are free to:
10
+
11
+ Share — copy and redistribute the material in any medium or format
12
+ Adapt — remix, transform, and build upon the material for any purpose, even commercially.
13
+
14
+ The licensor cannot revoke these freedoms as long as you follow the license terms.
15
+
16
+ Under the following terms:
17
+
18
+ Attribution — You must give appropriate credit, provide a link to the license, and indicate if changes were made. You may do so in any reasonable manner, but not in any way that suggests the licensor endorses you or your use.
19
+
20
+ No additional restrictions — You may not apply legal terms or technological measures that legally restrict others from doing anything the license permits.
21
+
22
+ Notices:
23
+
24
+ You do not have to comply with the license for elements of the material in the public domain or where your use is permitted by an applicable exception or limitation.
25
+
26
+ No warranties are given. The license may not give you all of the permissions necessary for your intended use. For example, other rights such as publicity, privacy, or moral rights may limit how you use the material.
27
+
28
+ ---
29
+
30
+ For the source code and technical implementation:
31
+ - The source code is available at: https://huggingface.co/spaces/tfrere/research-article-template
32
+ - Third-party figures and assets are excluded from this license and marked in their captions
33
+ - Dependencies and third-party libraries maintain their respective licenses
README.md CHANGED
@@ -1,10 +1,121 @@
1
  ---
2
- title: Smollm Blogpost
3
- emoji: 🚀
4
- colorFrom: red
5
- colorTo: pink
6
  sdk: docker
7
  pinned: false
 
 
 
 
 
 
 
 
8
  ---
 
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: 'Bringing paper to life: A modern template for scientific writing'
3
+ emoji: 📝
4
+ colorFrom: blue
5
+ colorTo: indigo
6
  sdk: docker
7
  pinned: false
8
+ header: mini
9
+ app_port: 8080
10
+ tags:
11
+ - research-article-template
12
+ - research paper
13
+ - scientific paper
14
+ - data visualization
15
+ thumbnail: https://huggingface.co/spaces/tfrere/research-paper-template/thumb.jpg
16
  ---
17
+ <div align="center">
18
 
19
+ # Research Article Template
20
+
21
+ [![License: CC BY 4.0](https://img.shields.io/badge/License-CC%20BY%204.0-lightgrey.svg)](https://creativecommons.org/licenses/by/4.0/)
22
+ [![Node.js Version](https://img.shields.io/badge/node-%3E%3D20.0.0-brightgreen.svg)](https://nodejs.org/)
23
+ [![Astro](https://img.shields.io/badge/Astro-4.10.0-orange.svg)](https://astro.build/)
24
+ [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/tfrere/research-article-template)
25
+
26
+
27
+ **A modern, interactive template for scientific writing** that brings papers to life with web-native features. The web offers what static PDFs can't: **interactive diagrams**, **progressive notation**, and **exploratory views** that show how ideas behave. This template treats interactive artifacts—figures, math, code, and inspectable experiments—as **first-class** alongside prose, helping readers **build intuition** instead of skimming results—all with **minimal setup** and no web knowledge required.
28
+
29
+ **[Try the live demo & documentation →](https://huggingface.co/spaces/tfrere/research-article-template)**
30
+
31
+ </div>
32
+
33
+ ## 🚀 Quick Start
34
+
35
+ ### Option 1: Duplicate on Hugging Face (Recommended)
36
+
37
+ 1. Visit **[🤗 Research Article Template](https://huggingface.co/spaces/tfrere/research-article-template)**
38
+ 2. Click **"Duplicate this Space"**
39
+ 3. Clone your new repository:
40
+ ```bash
41
+ git clone git@hf.co:spaces/<your-username>/<your-space>
42
+ cd <your-space>
43
+ ```
44
+
45
+ ### Option 2: Clone Directly
46
+
47
+ ```bash
48
+ git clone https://github.com/tfrere/research-article-template.git
49
+ cd research-article-template
50
+ ```
51
+
52
+ ### Installation
53
+
54
+ ```bash
55
+ # Install Node.js 20+ (use nvm for version management)
56
+ nvm install 20
57
+ nvm use 20
58
+
59
+ # Install Git LFS and pull assets
60
+ git lfs install
61
+ git lfs pull
62
+
63
+ # Install dependencies
64
+ cd app
65
+ npm install
66
+
67
+ # Start development server
68
+ npm run dev
69
+ ```
70
+
71
+ Visit `http://localhost:4321` to see your site!
72
+
73
+ ## 🎯 Who This Is For
74
+
75
+ - **Scientists** writing modern, web-native research papers
76
+ - **Educators** creating interactive, explorable lessons
77
+ - **Researchers** who want to focus on ideas, not infrastructure
78
+ - **Anyone** who values clear, engaging technical communication
79
+
80
+ ## 🌟 Inspired by Distill
81
+
82
+ This template carries forward the spirit of [Distill](https://distill.pub/) (2016–2021), pushing interactive scientific writing even further with:
83
+ - Accessible, high-quality explanations
84
+ - Reproducible, production-ready demos
85
+ - Modern web technologies and best practices
86
+
87
+ ## 🤝 Contributing
88
+
89
+ We welcome contributions! Please see our [Contributing Guidelines](CONTRIBUTING.md) for details.
90
+
91
+ ### Ways to Contribute
92
+
93
+ - **Report bugs** - Open an issue with detailed information
94
+ - **Suggest features** - Share ideas for improvements
95
+ - **Improve documentation** - Help others get started
96
+ - **Submit code** - Fix bugs or add features
97
+ - **Join discussions** - Share feedback and ideas
98
+
99
+ ## 📄 License
100
+
101
+ This project is licensed under the [Creative Commons Attribution 4.0 International License](https://creativecommons.org/licenses/by/4.0/).
102
+
103
+ - **Diagrams and text**: CC-BY 4.0
104
+ - **Source code**: Available on [Hugging Face](https://huggingface.co/spaces/tfrere/research-article-template)
105
+ - **Third-party figures**: Excluded and marked in captions
106
+
107
+ ## 🙏 Acknowledgments
108
+
109
+ - Inspired by [Distill](https://distill.pub/) and the interactive scientific writing movement
110
+ - Built with [Astro](https://astro.build/), [MDX](https://mdxjs.com/), and modern web technologies
111
+ - Community feedback and contributions from researchers worldwide
112
+
113
+ ## 📞 Support
114
+
115
+ - **[Community Discussions](https://huggingface.co/spaces/tfrere/research-article-template/discussions)** - Ask questions and share ideas
116
+ - **[Report Issues](https://huggingface.co/spaces/tfrere/research-article-template/discussions?status=open&type=issue)** - Bug reports and feature requests
117
+ - **Contact**: [@tfrere](https://huggingface.co/tfrere) on Hugging Face
118
+
119
+ ---
120
+
121
+ **Made with ❤️ for the scientific community**
app/astro.config.mjs ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { defineConfig } from 'astro/config';
2
+ import mdx from '@astrojs/mdx';
3
+ import svelte from '@astrojs/svelte';
4
+ import mermaid from 'astro-mermaid';
5
+ import compressor from 'astro-compressor';
6
+ import remarkMath from 'remark-math';
7
+ import rehypeKatex from 'rehype-katex';
8
+ import remarkFootnotes from 'remark-footnotes';
9
+ import rehypeSlug from 'rehype-slug';
10
+ import rehypeAutolinkHeadings from 'rehype-autolink-headings';
11
+ import rehypeCitation from 'rehype-citation';
12
+ import rehypeCodeCopy from './plugins/rehype/code-copy.mjs';
13
+ import rehypeReferencesAndFootnotes from './plugins/rehype/post-citation.mjs';
14
+ import remarkIgnoreCitationsInCode from './plugins/remark/ignore-citations-in-code.mjs';
15
+ import remarkDirective from 'remark-directive';
16
+ import remarkOutputContainer from './plugins/remark/output-container.mjs';
17
+ import rehypeRestoreAtInCode from './plugins/rehype/restore-at-in-code.mjs';
18
+ import rehypeWrapTables from './plugins/rehype/wrap-tables.mjs';
19
+ import rehypeWrapOutput from './plugins/rehype/wrap-outputs.mjs';
20
+ // Built-in Shiki (dual themes) — no rehype-pretty-code
21
+
22
+ // Plugins moved to app/plugins/*
23
+
24
+ export default defineConfig({
25
+ output: 'static',
26
+ integrations: [
27
+ mermaid({ theme: 'forest', autoTheme: true }),
28
+ mdx(),
29
+ svelte(),
30
+ // Precompress output with Gzip only (Brotli disabled due to server module mismatch)
31
+ compressor({ brotli: false, gzip: true })
32
+ ],
33
+ devToolbar: {
34
+ enabled: false
35
+ },
36
+ markdown: {
37
+ shikiConfig: {
38
+ themes: {
39
+ light: 'github-light',
40
+ dark: 'github-dark'
41
+ },
42
+ defaultColor: false,
43
+ wrap: false,
44
+ langAlias: {
45
+ // Map MDX fences to TSX for better JSX tokenization
46
+ mdx: 'tsx'
47
+ }
48
+ },
49
+ remarkPlugins: [
50
+ remarkIgnoreCitationsInCode,
51
+ remarkMath,
52
+ [remarkFootnotes, { inlineNotes: true }],
53
+ remarkDirective,
54
+ remarkOutputContainer
55
+ ],
56
+ rehypePlugins: [
57
+ rehypeSlug,
58
+ [rehypeAutolinkHeadings, { behavior: 'wrap' }],
59
+ [rehypeKatex, {
60
+ trust: true,
61
+ }],
62
+ [rehypeCitation, {
63
+ bibliography: 'src/content/bibliography.bib',
64
+ linkCitations: true,
65
+ csl: "apa",
66
+ }],
67
+ rehypeReferencesAndFootnotes,
68
+ rehypeRestoreAtInCode,
69
+ rehypeCodeCopy,
70
+ rehypeWrapOutput,
71
+ rehypeWrapTables
72
+ ]
73
+ }
74
+ });
75
+
76
+
app/package-lock.json ADDED
Binary file (450 kB). View file
 
app/plugins/rehype/code-copy.mjs ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Minimal rehype plugin to wrap code blocks with a copy button
2
+ // Exported as a standalone module to keep astro.config.mjs lean
3
+ export default function rehypeCodeCopy() {
4
+ return (tree) => {
5
+ // Walk the tree; lightweight visitor to find <pre><code>
6
+ const visit = (node, parent) => {
7
+ if (!node || typeof node !== 'object') return;
8
+ const children = Array.isArray(node.children) ? node.children : [];
9
+ if (node.tagName === 'pre' && children.some(c => c.tagName === 'code')) {
10
+ // Find code child
11
+ const code = children.find(c => c.tagName === 'code');
12
+ // Determine if single-line block: prefer Shiki lines, then text content
13
+ const countLinesFromShiki = () => {
14
+ const isLineEl = (el) => el && el.type === 'element' && el.tagName === 'span' && Array.isArray(el.properties?.className) && el.properties.className.includes('line');
15
+ const hasNonWhitespaceText = (node) => {
16
+ if (!node) return false;
17
+ if (node.type === 'text') return /\S/.test(String(node.value || ''));
18
+ const kids = Array.isArray(node.children) ? node.children : [];
19
+ return kids.some(hasNonWhitespaceText);
20
+ };
21
+ const collectLines = (node, acc) => {
22
+ if (!node || typeof node !== 'object') return;
23
+ if (isLineEl(node)) acc.push(node);
24
+ const kids = Array.isArray(node.children) ? node.children : [];
25
+ kids.forEach((k) => collectLines(k, acc));
26
+ };
27
+ const lines = [];
28
+ collectLines(code, lines);
29
+ const nonEmpty = lines.filter((ln) => hasNonWhitespaceText(ln)).length;
30
+ return nonEmpty || 0;
31
+ };
32
+ const countLinesFromText = () => {
33
+ // Parse raw text content of the <code> node including nested spans
34
+ const extractText = (node) => {
35
+ if (!node) return '';
36
+ if (node.type === 'text') return String(node.value || '');
37
+ const kids = Array.isArray(node.children) ? node.children : [];
38
+ return kids.map(extractText).join('');
39
+ };
40
+ const raw = extractText(code);
41
+ if (!raw || !/\S/.test(raw)) return 0;
42
+ return raw.split('\n').filter(line => /\S/.test(line)).length;
43
+ };
44
+ const lines = countLinesFromShiki() || countLinesFromText();
45
+ const isSingleLine = lines <= 1;
46
+ // Also treat code blocks shorter than a threshold as single-line (defensive)
47
+ if (!isSingleLine) {
48
+ const approxChars = (() => {
49
+ const extract = (n) => Array.isArray(n?.children) ? n.children.map(extract).join('') : (n?.type === 'text' ? String(n.value||'') : '');
50
+ return extract(code).length;
51
+ })();
52
+ if (approxChars < 6) {
53
+ node.__forceSingle = true;
54
+ }
55
+ }
56
+ // Replace <pre> with wrapper div.code-card containing button + pre
57
+ const wrapper = {
58
+ type: 'element',
59
+ tagName: 'div',
60
+ properties: { className: ['code-card'].concat((isSingleLine || node.__forceSingle) ? ['no-copy'] : []) },
61
+ children: (isSingleLine || node.__forceSingle) ? [ node ] : [
62
+ {
63
+ type: 'element',
64
+ tagName: 'button',
65
+ properties: { className: ['code-copy', 'button--ghost'], type: 'button', 'aria-label': 'Copy code' },
66
+ children: [
67
+ {
68
+ type: 'element',
69
+ tagName: 'svg',
70
+ properties: { viewBox: '0 0 24 24', 'aria-hidden': 'true', focusable: 'false' },
71
+ children: [
72
+ { type: 'element', tagName: 'path', properties: { d: 'M16 1H4c-1.1 0-2 .9-2 2v12h2V3h12V1zm3 4H8c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h11c1.1 0 2-.9 2-2V7c0-1.1-.9-2-2-2zm0 16H8V7h11v14z' }, children: [] }
73
+ ]
74
+ }
75
+ ]
76
+ },
77
+ node
78
+ ]
79
+ };
80
+ if (parent && Array.isArray(parent.children)) {
81
+ const idx = parent.children.indexOf(node);
82
+ if (idx !== -1) parent.children[idx] = wrapper;
83
+ }
84
+ return; // don't visit nested
85
+ }
86
+ children.forEach((c) => visit(c, node));
87
+ };
88
+ visit(tree, null);
89
+ };
90
+ }
91
+
92
+
93
+
94
+
app/plugins/rehype/post-citation.mjs ADDED
@@ -0,0 +1,441 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // rehype plugin to post-process citations and footnotes at build-time
2
+ // - Normalizes the bibliography into <ol class="references"> with <li id="...">
3
+ // - Linkifies DOI/URL occurrences inside references
4
+ // - Appends back-reference links (↩ back: 1, 2, ...) from each reference to in-text citation anchors
5
+ // - Cleans up footnotes block (.footnotes)
6
+
7
+ export default function rehypeReferencesAndFootnotes() {
8
+ return (tree) => {
9
+ const isElement = (n) => n && typeof n === 'object' && n.type === 'element';
10
+ const getChildren = (n) => (Array.isArray(n?.children) ? n.children : []);
11
+
12
+ const walk = (node, parent, fn) => {
13
+ if (!node || typeof node !== 'object') return;
14
+ fn && fn(node, parent);
15
+ const kids = getChildren(node);
16
+ for (const child of kids) walk(child, node, fn);
17
+ };
18
+
19
+ const ensureArray = (v) => (Array.isArray(v) ? v : v != null ? [v] : []);
20
+
21
+ const hasClass = (el, name) => {
22
+ const cn = ensureArray(el?.properties?.className).map(String);
23
+ return cn.includes(name);
24
+ };
25
+
26
+ const setAttr = (el, key, val) => {
27
+ el.properties = el.properties || {};
28
+ if (val == null) delete el.properties[key];
29
+ else el.properties[key] = val;
30
+ };
31
+
32
+ const getAttr = (el, key) => (el?.properties ? el.properties[key] : undefined);
33
+
34
+ // Shared helpers for backlinks + backrefs block
35
+ const collectBacklinksForIdSet = (idSet, anchorPrefix) => {
36
+ const idToBacklinks = new Map();
37
+ const idToAnchorNodes = new Map();
38
+ if (!idSet || idSet.size === 0) return { idToBacklinks, idToAnchorNodes };
39
+ walk(tree, null, (node) => {
40
+ if (!isElement(node) || node.tagName !== 'a') return;
41
+ const href = String(getAttr(node, 'href') || '');
42
+ if (!href.startsWith('#')) return;
43
+ const id = href.slice(1);
44
+ if (!idSet.has(id)) return;
45
+ // Ensure a stable id
46
+ let anchorId = String(getAttr(node, 'id') || '');
47
+ if (!anchorId) {
48
+ const list = idToBacklinks.get(id) || [];
49
+ anchorId = `${anchorPrefix}-${id}-${list.length + 1}`;
50
+ setAttr(node, 'id', anchorId);
51
+ }
52
+ const list = idToBacklinks.get(id) || [];
53
+ list.push(anchorId);
54
+ idToBacklinks.set(id, list);
55
+ const nodes = idToAnchorNodes.get(id) || [];
56
+ nodes.push(node);
57
+ idToAnchorNodes.set(id, nodes);
58
+ });
59
+ return { idToBacklinks, idToAnchorNodes };
60
+ };
61
+
62
+ const createBackIcon = () => ({
63
+ type: 'element',
64
+ tagName: 'svg',
65
+ properties: {
66
+ className: ['back-icon'],
67
+ width: 12,
68
+ height: 12,
69
+ viewBox: '0 0 24 24',
70
+ fill: 'none',
71
+ stroke: 'currentColor',
72
+ 'stroke-width': 2,
73
+ 'stroke-linecap': 'round',
74
+ 'stroke-linejoin': 'round',
75
+ 'aria-hidden': 'true',
76
+ focusable: 'false'
77
+ },
78
+ children: [
79
+ { type: 'element', tagName: 'line', properties: { x1: 12, y1: 19, x2: 12, y2: 5 }, children: [] },
80
+ { type: 'element', tagName: 'polyline', properties: { points: '5 12 12 5 19 12' }, children: [] }
81
+ ]
82
+ });
83
+
84
+ const appendBackrefsBlock = (listElement, idToBacklinks, ariaLabel) => {
85
+ if (!listElement || !idToBacklinks || idToBacklinks.size === 0) return;
86
+ for (const li of getChildren(listElement)) {
87
+ if (!isElement(li) || li.tagName !== 'li') continue;
88
+ const id = String(getAttr(li, 'id') || '');
89
+ if (!id) continue;
90
+ const keys = idToBacklinks.get(id);
91
+ if (!keys || !keys.length) continue;
92
+ // Remove pre-existing .backrefs in this li to avoid duplicates
93
+ li.children = getChildren(li).filter((n) => !(isElement(n) && n.tagName === 'small' && hasClass(n, 'backrefs')));
94
+ const small = {
95
+ type: 'element',
96
+ tagName: 'small',
97
+ properties: { className: ['backrefs'] },
98
+ children: []
99
+ };
100
+ if (keys.length === 1) {
101
+ // Single backlink: just the icon wrapped in the anchor
102
+ const a = {
103
+ type: 'element',
104
+ tagName: 'a',
105
+ properties: { href: `#${keys[0]}`, 'aria-label': ariaLabel },
106
+ children: [ createBackIcon() ]
107
+ };
108
+ small.children.push(a);
109
+ } else {
110
+ // Multiple backlinks: icon + label + numbered links
111
+ small.children.push(createBackIcon());
112
+ small.children.push({ type: 'text', value: ' back: ' });
113
+ keys.forEach((backId, idx) => {
114
+ small.children.push({
115
+ type: 'element',
116
+ tagName: 'a',
117
+ properties: { href: `#${backId}`, 'aria-label': ariaLabel },
118
+ children: [ { type: 'text', value: String(idx + 1) } ]
119
+ });
120
+ if (idx < keys.length - 1) small.children.push({ type: 'text', value: ', ' });
121
+ });
122
+ }
123
+ li.children.push(small);
124
+ }
125
+ };
126
+ // Remove default back-reference anchors generated by remark-footnotes inside a footnote item
127
+ const getTextContent = (el) => {
128
+ if (!el) return '';
129
+ const stack = [el];
130
+ let out = '';
131
+ while (stack.length) {
132
+ const cur = stack.pop();
133
+ if (!cur) continue;
134
+ if (cur.type === 'text') out += String(cur.value || '');
135
+ const kids = getChildren(cur);
136
+ for (let i = kids.length - 1; i >= 0; i--) stack.push(kids[i]);
137
+ }
138
+ return out;
139
+ };
140
+
141
+ const removeFootnoteBackrefAnchors = (el) => {
142
+ if (!isElement(el)) return;
143
+ const kids = getChildren(el);
144
+ for (let i = kids.length - 1; i >= 0; i--) {
145
+ const child = kids[i];
146
+ if (isElement(child)) {
147
+ if (
148
+ child.tagName === 'a' && (
149
+ getAttr(child, 'data-footnote-backref') != null ||
150
+ hasClass(child, 'footnote-backref') ||
151
+ String(getAttr(child, 'role') || '').toLowerCase() === 'doc-backlink' ||
152
+ String(getAttr(child, 'aria-label') || '').toLowerCase().includes('back to content') ||
153
+ String(getAttr(child, 'href') || '').startsWith('#fnref') ||
154
+ // Fallback: text-based detection like "↩" or "↩2"
155
+ /^\s*↩\s*\d*\s*$/u.test(getTextContent(child))
156
+ )
157
+ ) {
158
+ // Remove the anchor
159
+ el.children.splice(i, 1);
160
+ continue;
161
+ }
162
+ // Recurse into element
163
+ removeFootnoteBackrefAnchors(child);
164
+ // If a wrapper like <sup> or <span> became empty, remove it
165
+ const becameKids = getChildren(child);
166
+ if ((child.tagName === 'sup' || child.tagName === 'span') && (!becameKids || becameKids.length === 0)) {
167
+ el.children.splice(i, 1);
168
+ }
169
+ }
170
+ }
171
+ };
172
+
173
+
174
+ const normDoiHref = (href) => {
175
+ if (!href) return href;
176
+ const DUP = /https?:\/\/(?:dx\.)?doi\.org\/(?:https?:\/\/(?:dx\.)?doi\.org\/)+/gi;
177
+ const ONE = /https?:\/\/(?:dx\.)?doi\.org\/(10\.[^\s<>"']+)/i;
178
+ href = String(href).replace(DUP, 'https://doi.org/');
179
+ const m = href.match(ONE);
180
+ return m ? `https://doi.org/${m[1]}` : href;
181
+ };
182
+
183
+ const DOI_BARE = /\b10\.[0-9]{4,9}\/[\-._;()\/:A-Z0-9]+\b/gi;
184
+ const URL_GEN = /\bhttps?:\/\/[^\s<>()"']+/gi;
185
+
186
+ const linkifyTextNode = (textNode) => {
187
+ const text = String(textNode.value || '');
188
+ let last = 0;
189
+ const parts = [];
190
+ const pushText = (s) => { if (s) parts.push({ type: 'text', value: s }); };
191
+
192
+ const matches = [];
193
+ // Collect URL matches
194
+ let m;
195
+ URL_GEN.lastIndex = 0;
196
+ while ((m = URL_GEN.exec(text)) !== null) {
197
+ matches.push({ type: 'url', start: m.index, end: URL_GEN.lastIndex, raw: m[0] });
198
+ }
199
+ // Collect DOI matches
200
+ DOI_BARE.lastIndex = 0;
201
+ while ((m = DOI_BARE.exec(text)) !== null) {
202
+ matches.push({ type: 'doi', start: m.index, end: DOI_BARE.lastIndex, raw: m[0] });
203
+ }
204
+ matches.sort((a, b) => a.start - b.start);
205
+
206
+ for (const match of matches) {
207
+ if (match.start < last) continue; // overlapping
208
+ pushText(text.slice(last, match.start));
209
+ if (match.type === 'url') {
210
+ const href = normDoiHref(match.raw);
211
+ const doiOne = href.match(/https?:\/\/(?:dx\.)?doi\.org\/(10\.[^\s<>"']+)/i);
212
+ const a = {
213
+ type: 'element',
214
+ tagName: 'a',
215
+ properties: { href, target: '_blank', rel: 'noopener noreferrer' },
216
+ children: [{ type: 'text', value: doiOne ? doiOne[1] : href }]
217
+ };
218
+ parts.push(a);
219
+ } else {
220
+ const href = `https://doi.org/${match.raw}`;
221
+ const a = {
222
+ type: 'element',
223
+ tagName: 'a',
224
+ properties: { href, target: '_blank', rel: 'noopener noreferrer' },
225
+ children: [{ type: 'text', value: match.raw }]
226
+ };
227
+ parts.push(a);
228
+ }
229
+ last = match.end;
230
+ }
231
+
232
+ pushText(text.slice(last));
233
+ return parts;
234
+ };
235
+
236
+ const linkifyInElement = (el) => {
237
+ const kids = getChildren(el);
238
+ for (let i = 0; i < kids.length; i++) {
239
+ const child = kids[i];
240
+ if (!child) continue;
241
+ if (child.type === 'text') {
242
+ const replacement = linkifyTextNode(child);
243
+ if (replacement.length === 1 && replacement[0].type === 'text') continue;
244
+ // Replace the single text node with multiple nodes
245
+ el.children.splice(i, 1, ...replacement);
246
+ i += replacement.length - 1;
247
+ } else if (isElement(child)) {
248
+ if (child.tagName === 'a') {
249
+ const href = normDoiHref(getAttr(child, 'href'));
250
+ setAttr(child, 'href', href);
251
+ const m = String(href || '').match(/https?:\/\/(?:dx\.)?doi\.org\/(10\.[^\s<>"']+)/i);
252
+ if (m && (!child.children || child.children.length === 0)) {
253
+ child.children = [{ type: 'text', value: m[1] }];
254
+ }
255
+ continue;
256
+ }
257
+ linkifyInElement(child);
258
+ }
259
+ }
260
+ // Deduplicate adjacent identical anchors
261
+ for (let i = 1; i < el.children.length; i++) {
262
+ const prev = el.children[i - 1];
263
+ const curr = el.children[i];
264
+ if (isElement(prev) && isElement(curr) && prev.tagName === 'a' && curr.tagName === 'a') {
265
+ const key = `${getAttr(prev, 'href') || ''}|${(prev.children?.[0]?.value) || ''}`;
266
+ const key2 = `${getAttr(curr, 'href') || ''}|${(curr.children?.[0]?.value) || ''}`;
267
+ if (key === key2) {
268
+ el.children.splice(i, 1);
269
+ i--;
270
+ }
271
+ }
272
+ }
273
+ };
274
+
275
+ // Find references container and normalize its list
276
+ const findReferencesRoot = () => {
277
+ let found = null;
278
+ walk(tree, null, (node) => {
279
+ if (found) return;
280
+ if (!isElement(node)) return;
281
+ const id = getAttr(node, 'id');
282
+ if (id === 'references' || hasClass(node, 'references') || hasClass(node, 'bibliography')) {
283
+ found = node;
284
+ }
285
+ });
286
+ return found;
287
+ };
288
+
289
+ const toOrderedList = (container) => {
290
+ // If there is already an <ol>, use it; otherwise convert common structures
291
+ let ol = getChildren(container).find((c) => isElement(c) && c.tagName === 'ol');
292
+ if (!ol) {
293
+ ol = { type: 'element', tagName: 'ol', properties: { className: ['references'] }, children: [] };
294
+ const candidates = getChildren(container).filter((n) => isElement(n));
295
+ if (candidates.length) {
296
+ for (const node of candidates) {
297
+ if (hasClass(node, 'csl-entry') || node.tagName === 'li' || node.tagName === 'p' || node.tagName === 'div') {
298
+ const li = { type: 'element', tagName: 'li', properties: {}, children: getChildren(node) };
299
+ if (getAttr(node, 'id')) setAttr(li, 'id', getAttr(node, 'id'));
300
+ ol.children.push(li);
301
+ }
302
+ }
303
+ }
304
+ // Replace container children by the new ol
305
+ container.children = [ol];
306
+ }
307
+ if (!hasClass(ol, 'references')) {
308
+ const cls = ensureArray(ol.properties?.className).map(String);
309
+ if (!cls.includes('references')) cls.push('references');
310
+ ol.properties = ol.properties || {};
311
+ ol.properties.className = cls;
312
+ }
313
+ return ol;
314
+ };
315
+
316
+ const refsRoot = findReferencesRoot();
317
+ let refsOl = null;
318
+ const refIdSet = new Set();
319
+ const refIdToExternalHref = new Map();
320
+
321
+ if (refsRoot) {
322
+ refsOl = toOrderedList(refsRoot);
323
+ // Collect item ids and linkify their content
324
+ for (const li of getChildren(refsOl)) {
325
+ if (!isElement(li) || li.tagName !== 'li') continue;
326
+ if (!getAttr(li, 'id')) {
327
+ // Try to find a nested element with id to promote
328
+ const nestedWithId = getChildren(li).find((n) => isElement(n) && getAttr(n, 'id'));
329
+ if (nestedWithId) setAttr(li, 'id', getAttr(nestedWithId, 'id'));
330
+ }
331
+ const id = getAttr(li, 'id');
332
+ if (id) refIdSet.add(String(id));
333
+ linkifyInElement(li);
334
+ // Record first external link href (e.g., DOI/URL) if present
335
+ if (id) {
336
+ let externalHref = null;
337
+ const stack = [li];
338
+ while (stack.length) {
339
+ const cur = stack.pop();
340
+ const kids = getChildren(cur);
341
+ for (const k of kids) {
342
+ if (isElement(k) && k.tagName === 'a') {
343
+ const href = String(getAttr(k, 'href') || '');
344
+ if (/^https?:\/\//i.test(href)) {
345
+ externalHref = href;
346
+ break;
347
+ }
348
+ }
349
+ if (isElement(k)) stack.push(k);
350
+ }
351
+ if (externalHref) break;
352
+ }
353
+ if (externalHref) refIdToExternalHref.set(String(id), externalHref);
354
+ }
355
+ }
356
+ setAttr(refsRoot, 'data-built-refs', '1');
357
+ }
358
+
359
+ // Collect in-text anchors that point to references ids
360
+ const { idToBacklinks: refIdToBacklinks, idToAnchorNodes: refIdToCitationAnchors } = collectBacklinksForIdSet(refIdSet, 'refctx');
361
+
362
+ // Append backlinks into references list items
363
+ appendBackrefsBlock(refsOl, refIdToBacklinks, 'Back to citation');
364
+
365
+ // Rewrite in-text citation anchors to external link when available
366
+ if (refIdToCitationAnchors.size > 0) {
367
+ for (const [id, anchors] of refIdToCitationAnchors.entries()) {
368
+ const ext = refIdToExternalHref.get(id);
369
+ if (!ext) continue;
370
+ for (const a of anchors) {
371
+ setAttr(a, 'data-ref-id', id);
372
+ setAttr(a, 'href', ext);
373
+ const existingTarget = getAttr(a, 'target');
374
+ if (!existingTarget) setAttr(a, 'target', '_blank');
375
+ const rel = String(getAttr(a, 'rel') || '');
376
+ const relSet = new Set(rel ? rel.split(/\s+/) : []);
377
+ relSet.add('noopener');
378
+ relSet.add('noreferrer');
379
+ setAttr(a, 'rel', Array.from(relSet).join(' '));
380
+ }
381
+ }
382
+ }
383
+
384
+ // Footnotes cleanup + backrefs harmonized with references
385
+ const cleanupFootnotes = () => {
386
+ let root = null;
387
+ walk(tree, null, (node) => {
388
+ if (!isElement(node)) return;
389
+ if (hasClass(node, 'footnotes')) root = node;
390
+ });
391
+ if (!root) return { root: null, ol: null, idSet: new Set() };
392
+ // Remove <hr> direct children
393
+ root.children = getChildren(root).filter((n) => !(isElement(n) && n.tagName === 'hr'));
394
+ // Ensure an <ol>
395
+ let ol = getChildren(root).find((c) => isElement(c) && c.tagName === 'ol');
396
+ if (!ol) {
397
+ ol = { type: 'element', tagName: 'ol', properties: {}, children: [] };
398
+ const items = getChildren(root).filter((n) => isElement(n) && (n.tagName === 'li' || hasClass(n, 'footnote') || n.tagName === 'p' || n.tagName === 'div'));
399
+ if (items.length) {
400
+ for (const it of items) {
401
+ const li = { type: 'element', tagName: 'li', properties: {}, children: getChildren(it) };
402
+ // Promote nested id if present (e.g., <p id="fn-1">)
403
+ const nestedWithId = getChildren(it).find((n) => isElement(n) && getAttr(n, 'id'));
404
+ if (nestedWithId) setAttr(li, 'id', getAttr(nestedWithId, 'id'));
405
+ ol.children.push(li);
406
+ }
407
+ }
408
+ root.children = [ol];
409
+ }
410
+ // For existing structures, try to promote ids from children when missing
411
+ for (const li of getChildren(ol)) {
412
+ if (!isElement(li) || li.tagName !== 'li') continue;
413
+ if (!getAttr(li, 'id')) {
414
+ const nestedWithId = getChildren(li).find((n) => isElement(n) && getAttr(n, 'id'));
415
+ if (nestedWithId) setAttr(li, 'id', getAttr(nestedWithId, 'id'));
416
+ }
417
+ // Remove default footnote backrefs anywhere inside (to avoid duplication)
418
+ removeFootnoteBackrefAnchors(li);
419
+ }
420
+ setAttr(root, 'data-built-footnotes', '1');
421
+ // Collect id set
422
+ const idSet = new Set();
423
+ for (const li of getChildren(ol)) {
424
+ if (!isElement(li) || li.tagName !== 'li') continue;
425
+ const id = getAttr(li, 'id');
426
+ if (id) idSet.add(String(id));
427
+ }
428
+ return { root, ol, idSet };
429
+ };
430
+
431
+ const { root: footRoot, ol: footOl, idSet: footIdSet } = cleanupFootnotes();
432
+
433
+ // Collect in-text anchors pointing to footnotes
434
+ const { idToBacklinks: footIdToBacklinks } = collectBacklinksForIdSet(footIdSet, 'footctx');
435
+
436
+ // Append backlinks into footnote list items (identical pattern to references)
437
+ appendBackrefsBlock(footOl, footIdToBacklinks, 'Back to footnote call');
438
+ };
439
+ }
440
+
441
+
app/plugins/rehype/restore-at-in-code.mjs ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Rehype plugin to restore '@' inside code nodes after rehype-citation ran
2
+ export default function rehypeRestoreAtInCode() {
3
+ return (tree) => {
4
+ const restoreInNode = (node) => {
5
+ if (!node || typeof node !== 'object') return;
6
+ const isText = node.type === 'text';
7
+ if (isText && typeof node.value === 'string' && node.value.includes('__AT_SENTINEL__')) {
8
+ node.value = node.value.replace(/__AT_SENTINEL__/g, '@');
9
+ }
10
+ const isCodeEl = node.type === 'element' && node.tagName === 'code';
11
+ const children = Array.isArray(node.children) ? node.children : [];
12
+ if (isCodeEl && children.length) {
13
+ children.forEach(restoreInNode);
14
+ return;
15
+ }
16
+ children.forEach(restoreInNode);
17
+ };
18
+ restoreInNode(tree);
19
+ };
20
+ }
21
+
22
+
app/plugins/rehype/wrap-outputs.mjs ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Wrap plain-text content inside <section class="code-outputs"> into a <pre>
2
+ export default function rehypeWrapOutput() {
3
+ return (tree) => {
4
+ const isWhitespace = (value) => typeof value === 'string' && !/\S/.test(value);
5
+ const extractText = (node) => {
6
+ if (!node) return '';
7
+ if (node.type === 'text') return String(node.value || '');
8
+ const kids = Array.isArray(node.children) ? node.children : [];
9
+ return kids.map(extractText).join('');
10
+ };
11
+ const visit = (node) => {
12
+ if (!node || typeof node !== 'object') return;
13
+ const children = Array.isArray(node.children) ? node.children : [];
14
+ if (node.type === 'element' && node.tagName === 'section') {
15
+ const className = node.properties?.className || [];
16
+ const classes = Array.isArray(className) ? className : [className].filter(Boolean);
17
+ if (classes.includes('code-output')) {
18
+ const meaningful = children.filter((c) => !(c.type === 'text' && isWhitespace(c.value)));
19
+ if (meaningful.length === 1) {
20
+ const only = meaningful[0];
21
+ const isPlainParagraph = only.type === 'element' && only.tagName === 'p' && (only.children || []).every((c) => c.type === 'text');
22
+ const isPlainText = only.type === 'text';
23
+ if (isPlainParagraph || isPlainText) {
24
+ const text = isPlainText ? String(only.value || '') : extractText(only);
25
+ node.children = [
26
+ { type: 'element', tagName: 'pre', properties: {}, children: [ { type: 'text', value: text } ] }
27
+ ];
28
+ }
29
+ }
30
+ }
31
+ }
32
+ children.forEach(visit);
33
+ };
34
+ visit(tree);
35
+ };
36
+ }
37
+
38
+
app/plugins/rehype/wrap-tables.mjs ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // rehype plugin: wrap bare <table> elements in a <div class="table-scroll"> container
2
+ // so that tables stay width:100% while enabling horizontal scroll when content overflows
3
+
4
+ export default function rehypeWrapTables() {
5
+ return (tree) => {
6
+ const isElement = (n) => n && typeof n === 'object' && n.type === 'element';
7
+ const getChildren = (n) => (Array.isArray(n?.children) ? n.children : []);
8
+
9
+ const walk = (node, parent, fn) => {
10
+ if (!node || typeof node !== 'object') return;
11
+ fn && fn(node, parent);
12
+ const kids = getChildren(node);
13
+ for (const child of kids) walk(child, node, fn);
14
+ };
15
+
16
+ const ensureArray = (v) => (Array.isArray(v) ? v : v != null ? [v] : []);
17
+ const hasClass = (el, name) => ensureArray(el?.properties?.className).map(String).includes(name);
18
+
19
+ const wrapTable = (tableNode, parent) => {
20
+ if (!parent || !Array.isArray(parent.children)) return;
21
+ // Don't double-wrap if already inside .table-scroll
22
+ if (parent.tagName === 'div' && hasClass(parent, 'table-scroll')) return;
23
+
24
+ const wrapper = {
25
+ type: 'element',
26
+ tagName: 'div',
27
+ properties: { className: ['table-scroll'] },
28
+ children: [tableNode]
29
+ };
30
+
31
+ const idx = parent.children.indexOf(tableNode);
32
+ if (idx >= 0) parent.children.splice(idx, 1, wrapper);
33
+ };
34
+
35
+ walk(tree, null, (node, parent) => {
36
+ if (!isElement(node)) return;
37
+ if (node.tagName !== 'table') return;
38
+ wrapTable(node, parent);
39
+ });
40
+ };
41
+ }
42
+
43
+
app/plugins/remark/ignore-citations-in-code.mjs ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Remark plugin to ignore citations inside code (block and inline)
2
+ export default function remarkIgnoreCitationsInCode() {
3
+ return (tree) => {
4
+ const visit = (node) => {
5
+ if (!node || typeof node !== 'object') return;
6
+ const type = node.type;
7
+ if (type === 'code' || type === 'inlineCode') {
8
+ if (typeof node.value === 'string' && node.value.includes('@')) {
9
+ // Use a sentinel to avoid rehype-citation, will be restored later in rehype
10
+ node.value = node.value.replace(/@/g, '__AT_SENTINEL__');
11
+ }
12
+ return; // do not traverse into code
13
+ }
14
+ const children = Array.isArray(node.children) ? node.children : [];
15
+ children.forEach(visit);
16
+ };
17
+ visit(tree);
18
+ };
19
+ }
20
+
21
+
app/plugins/remark/output-container.mjs ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Transform `:::output ... :::` into a <section class="code-output"> wrapper
2
+ // Requires remark-directive to be applied before this plugin
3
+
4
+ export default function remarkOutputContainer() {
5
+ return (tree) => {
6
+ const visit = (node) => {
7
+ if (!node || typeof node !== 'object') return;
8
+
9
+ if (node.type === 'containerDirective' && node.name === 'output') {
10
+ node.data = node.data || {};
11
+ node.data.hName = 'section';
12
+ node.data.hProperties = { className: ['code-output'] };
13
+ }
14
+
15
+ const children = Array.isArray(node.children) ? node.children : [];
16
+ for (const child of children) visit(child);
17
+ };
18
+
19
+ visit(tree);
20
+ };
21
+ }
22
+
23
+
app/plugins/remark/outputs-container.mjs ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Transform `:::outputs ... :::` into a <section class="code-outputs"> wrapper
2
+ // Requires remark-directive to be applied before this plugin
3
+
4
+ export default function remarkOutputsContainer() {
5
+ return (tree) => {
6
+ const visit = (node) => {
7
+ if (!node || typeof node !== 'object') return;
8
+
9
+ if (node.type === 'containerDirective' && node.name === 'outputs') {
10
+ node.data = node.data || {};
11
+ node.data.hName = 'section';
12
+ node.data.hProperties = { className: ['code-outputs'] };
13
+ }
14
+
15
+ const children = Array.isArray(node.children) ? node.children : [];
16
+ for (const child of children) visit(child);
17
+ };
18
+
19
+ visit(tree);
20
+ };
21
+ }
22
+
23
+
app/postcss.config.mjs ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // PostCSS config enabling Custom Media Queries
2
+ // Allows usage of: @media (--bp-content-collapse) { ... }
3
+
4
+ import postcssCustomMedia from 'postcss-custom-media';
5
+ import postcssPresetEnv from 'postcss-preset-env';
6
+
7
+ export default {
8
+ plugins: [
9
+ postcssCustomMedia(),
10
+ postcssPresetEnv({
11
+ stage: 0
12
+ })
13
+ ]
14
+ };
app/public/data ADDED
@@ -0,0 +1 @@
 
 
1
+ ../src/content/assets/data
app/public/scripts/color-palettes.js ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Global color palettes generator and watcher
2
+ // - Observes CSS variable --primary-color and theme changes
3
+ // - Generates categorical, sequential, and diverging palettes (OKLCH/OKLab)
4
+ // - Exposes results as CSS variables on :root
5
+ // - Supports variable color counts per palette via CSS vars
6
+ // - Dispatches a 'palettes:updated' CustomEvent after each update
7
+
8
+ (() => {
9
+ const MODE = { cssRoot: document.documentElement };
10
+
11
+ const getCssVar = (name) => {
12
+ try { return getComputedStyle(MODE.cssRoot).getPropertyValue(name).trim(); } catch { return ''; }
13
+ };
14
+ const getIntFromCssVar = (name, fallback) => {
15
+ const raw = getCssVar(name);
16
+ if (!raw) return fallback;
17
+ const v = parseInt(String(raw), 10);
18
+ if (Number.isNaN(v)) return fallback;
19
+ return v;
20
+ };
21
+ const clamp = (n, min, max) => Math.max(min, Math.min(max, n));
22
+
23
+ // Color math (OKLab/OKLCH)
24
+ const srgbToLinear = (u) => (u <= 0.04045 ? u / 12.92 : Math.pow((u + 0.055) / 1.055, 2.4));
25
+ const linearToSrgb = (u) => (u <= 0.0031308 ? 12.92 * u : 1.055 * Math.pow(Math.max(0, u), 1 / 2.4) - 0.055);
26
+ const rgbToOklab = (r, g, b) => {
27
+ const rl = srgbToLinear(r), gl = srgbToLinear(g), bl = srgbToLinear(b);
28
+ const l = Math.cbrt(0.4122214708 * rl + 0.5363325363 * gl + 0.0514459929 * bl);
29
+ const m = Math.cbrt(0.2119034982 * rl + 0.6806995451 * gl + 0.1073969566 * bl);
30
+ const s = Math.cbrt(0.0883024619 * rl + 0.2817188376 * gl + 0.6299787005 * bl);
31
+ const L = 0.2104542553 * l + 0.7936177850 * m - 0.0040720468 * s;
32
+ const a = 1.9779984951 * l - 2.4285922050 * m + 0.4505937099 * s;
33
+ const b2 = 0.0259040371 * l + 0.7827717662 * m - 0.8086757660 * s;
34
+ return { L, a, b: b2 };
35
+ };
36
+ const oklabToRgb = (L, a, b) => {
37
+ const l_ = L + 0.3963377774 * a + 0.2158037573 * b;
38
+ const m_ = L - 0.1055613458 * a - 0.0638541728 * b;
39
+ const s_ = L - 0.0894841775 * a - 1.2914855480 * b;
40
+ const l = l_ * l_ * l_;
41
+ const m = m_ * m_ * m_;
42
+ const s = s_ * s_ * s_;
43
+ const r = linearToSrgb(+4.0767416621 * l - 3.3077115913 * m + 0.2309699292 * s);
44
+ const g = linearToSrgb(-1.2684380046 * l + 2.6097574011 * m - 0.3413193965 * s);
45
+ const b3 = linearToSrgb(-0.0041960863 * l - 0.7034186147 * m + 1.7076147010 * s);
46
+ return { r, g, b: b3 };
47
+ };
48
+ const oklchToOklab = (L, C, hDeg) => { const h = (hDeg * Math.PI) / 180; return { L, a: C * Math.cos(h), b: C * Math.sin(h) }; };
49
+ const oklabToOklch = (L, a, b) => { const C = Math.sqrt(a * a + b * b); let h = Math.atan2(b, a) * 180 / Math.PI; if (h < 0) h += 360; return { L, C, h }; };
50
+ const clamp01 = (x) => Math.min(1, Math.max(0, x));
51
+ const isInGamut = ({ r, g, b }) => r >= 0 && r <= 1 && g >= 0 && g <= 1 && b >= 0 && b <= 1;
52
+ const toHex = ({ r, g, b }) => {
53
+ const R = Math.round(clamp01(r) * 255), G = Math.round(clamp01(g) * 255), B = Math.round(clamp01(b) * 255);
54
+ const h = (n) => n.toString(16).padStart(2, '0');
55
+ return `#${h(R)}${h(G)}${h(B)}`.toUpperCase();
56
+ };
57
+ const oklchToHexSafe = (L, C, h) => { let c = C; for (let i = 0; i < 12; i++) { const { a, b } = oklchToOklab(L, c, h); const rgb = oklabToRgb(L, a, b); if (isInGamut(rgb)) return toHex(rgb); c = Math.max(0, c - 0.02); } return toHex(oklabToRgb(L, 0, 0)); };
58
+ const parseCssColorToRgb = (css) => { try { const el = document.createElement('span'); el.style.color = css; document.body.appendChild(el); const cs = getComputedStyle(el).color; document.body.removeChild(el); const m = cs.match(/rgba?\((\d+),\s*(\d+),\s*(\d+)/i); if (!m) return null; return { r: Number(m[1]) / 255, g: Number(m[2]) / 255, b: Number(m[3]) / 255 }; } catch { return null; } };
59
+
60
+ // Get primary color in OKLCH format to preserve precision
61
+ const getPrimaryOKLCH = () => {
62
+ const css = getCssVar('--primary-color');
63
+ if (!css) return null;
64
+
65
+ // For OKLCH colors, return the exact values without conversion
66
+ if (css.includes('oklch')) {
67
+ const oklchMatch = css.match(/oklch\(([^)]+)\)/);
68
+ if (oklchMatch) {
69
+ const values = oklchMatch[1].split(/\s+/).map(v => parseFloat(v.trim()));
70
+ if (values.length >= 3) {
71
+ const [L, C, h] = values;
72
+ return { L, C, h };
73
+ }
74
+ }
75
+ }
76
+
77
+ // For non-OKLCH colors, convert to OKLCH for consistency
78
+ const rgb = parseCssColorToRgb(css);
79
+ if (rgb) {
80
+ const { L, a, b } = rgbToOklab(rgb.r, rgb.g, rgb.b);
81
+ const { C, h } = oklabToOklch(L, a, b);
82
+ return { L, C, h };
83
+ }
84
+ return null;
85
+ };
86
+
87
+ // Keep getPrimaryHex for backward compatibility, but now it converts from OKLCH
88
+ const getPrimaryHex = () => {
89
+ const oklch = getPrimaryOKLCH();
90
+ if (!oklch) return null;
91
+
92
+ const { a, b } = oklchToOklab(oklch.L, oklch.C, oklch.h);
93
+ const rgb = oklabToRgb(oklch.L, a, b);
94
+ return toHex(rgb);
95
+ };
96
+ // No count management via CSS anymore; counts are passed directly to the API
97
+
98
+ const generators = {
99
+ categorical: (baseOKLCH, count) => {
100
+ const { L, C, h } = baseOKLCH;
101
+ const L0 = Math.min(0.85, Math.max(0.4, L));
102
+ const C0 = Math.min(0.35, Math.max(0.1, C || 0.2));
103
+ const total = Math.max(1, Math.min(12, count || 8));
104
+ const hueStep = 360 / total;
105
+ const results = [];
106
+ for (let i = 0; i < total; i++) {
107
+ const hDeg = (h + i * hueStep) % 360;
108
+ const lVar = ((i % 3) - 1) * 0.04;
109
+ results.push(oklchToHexSafe(Math.max(0.4, Math.min(0.85, L0 + lVar)), C0, hDeg));
110
+ }
111
+ return results;
112
+ },
113
+ sequential: (baseOKLCH, count) => {
114
+ const { L, C, h } = baseOKLCH;
115
+ const total = Math.max(1, Math.min(12, count || 8));
116
+ const startL = Math.max(0.25, L - 0.18);
117
+ const endL = Math.min(0.92, L + 0.18);
118
+ const cBase = Math.min(0.33, Math.max(0.08, C * 0.9 + 0.06));
119
+ const out = [];
120
+ for (let i = 0; i < total; i++) {
121
+ const t = total === 1 ? 0 : i / (total - 1);
122
+ const lNow = startL * (1 - t) + endL * t;
123
+ const cNow = cBase * (0.85 + 0.15 * (1 - Math.abs(0.5 - t) * 2));
124
+ out.push(oklchToHexSafe(lNow, cNow, h));
125
+ }
126
+ return out;
127
+ },
128
+ diverging: (baseOKLCH, count) => {
129
+ const { L, C, h } = baseOKLCH;
130
+ const total = Math.max(1, Math.min(12, count || 8));
131
+
132
+ // Left endpoint: EXACT primary color (no darkening)
133
+ const leftLab = oklchToOklab(L, C, h);
134
+ // Right endpoint: complement with same L and similar C (clamped safe)
135
+ const compH = (h + 180) % 360;
136
+ const cSafe = Math.min(0.35, Math.max(0.08, C));
137
+ const rightLab = oklchToOklab(L, cSafe, compH);
138
+ const whiteLab = { L: 0.98, a: 0, b: 0 }; // center near‑white
139
+
140
+ const hexFromOKLab = (L, a, b) => toHex(oklabToRgb(L, a, b));
141
+ const lerp = (a, b, t) => a + (b - a) * t;
142
+ const lerpOKLabHex = (A, B, t) => hexFromOKLab(lerp(A.L, B.L, t), lerp(A.a, B.a, t), lerp(A.b, B.b, t));
143
+
144
+ const out = [];
145
+ if (total % 2 === 1) {
146
+ const nSide = (total - 1) >> 1; // items on each side
147
+ // Left side: include left endpoint exactly at index 0
148
+ for (let i = 0; i < nSide; i++) {
149
+ const t = nSide <= 1 ? 0 : (i / (nSide - 1)); // 0 .. 1
150
+ // Move from leftLab to a value close (but not equal) to white; ensure last before center is lighter
151
+ const tt = t * 0.9; // keep some distance from pure white before center
152
+ out.push(lerpOKLabHex(leftLab, whiteLab, tt));
153
+ }
154
+ // Center
155
+ out.push(hexFromOKLab(whiteLab.L, whiteLab.a, whiteLab.b));
156
+ // Right side: start near white and end EXACTLY at rightLab
157
+ for (let i = 0; i < nSide; i++) {
158
+ const t = nSide <= 1 ? 1 : ((i + 1) / nSide); // (1/n)..1
159
+ const tt = Math.max(0.1, t); // avoid starting at pure white
160
+ out.push(lerpOKLabHex(whiteLab, rightLab, tt));
161
+ }
162
+ // Ensure first and last are exact endpoints
163
+ if (out.length) { out[0] = hexFromOKLab(leftLab.L, leftLab.a, leftLab.b); out[out.length - 1] = hexFromOKLab(rightLab.L, rightLab.a, rightLab.b); }
164
+ } else {
165
+ const nSide = total >> 1;
166
+ // Left half including left endpoint, approaching white but not reaching it
167
+ for (let i = 0; i < nSide; i++) {
168
+ const t = nSide <= 1 ? 0 : (i / (nSide - 1)); // 0 .. 1
169
+ const tt = t * 0.9;
170
+ out.push(lerpOKLabHex(leftLab, whiteLab, tt));
171
+ }
172
+ // Right half: mirror from near white to exact right endpoint
173
+ for (let i = 0; i < nSide; i++) {
174
+ const t = nSide <= 1 ? 1 : ((i + 1) / nSide); // (1/n)..1
175
+ const tt = Math.max(0.1, t);
176
+ out.push(lerpOKLabHex(whiteLab, rightLab, tt));
177
+ }
178
+ if (out.length) { out[0] = hexFromOKLab(leftLab.L, leftLab.a, leftLab.b); out[out.length - 1] = hexFromOKLab(rightLab.L, rightLab.a, rightLab.b); }
179
+ }
180
+ return out;
181
+ }
182
+ };
183
+
184
+ let lastSignature = '';
185
+
186
+ const updatePalettes = () => {
187
+ const primaryOKLCH = getPrimaryOKLCH();
188
+ const primaryHex = getPrimaryHex();
189
+ const signature = `${primaryOKLCH?.L},${primaryOKLCH?.C},${primaryOKLCH?.h}`;
190
+ if (signature === lastSignature) return;
191
+ lastSignature = signature;
192
+ try { document.dispatchEvent(new CustomEvent('palettes:updated', { detail: { primary: primaryHex, primaryOKLCH } })); } catch { }
193
+ };
194
+
195
+ const bootstrap = () => {
196
+ // Initial setup - only run once on page load
197
+ updatePalettes();
198
+
199
+ // Observer will handle all subsequent changes
200
+ const mo = new MutationObserver(() => updatePalettes());
201
+ mo.observe(MODE.cssRoot, { attributes: true, attributeFilter: ['style', 'data-theme'] });
202
+
203
+ // Utility: choose high-contrast (or softened) text style against an arbitrary background color
204
+ const pickTextStyleForBackground = (bgCss, opts = {}) => {
205
+ const cssRoot = document.documentElement;
206
+ const getCssVar = (name) => {
207
+ try { return getComputedStyle(cssRoot).getPropertyValue(name).trim(); } catch { return ''; }
208
+ };
209
+ const resolveCssToRgb01 = (css) => {
210
+ const rgb = parseCssColorToRgb(css);
211
+ if (!rgb) return null;
212
+ return rgb; // already 0..1
213
+ };
214
+ const mixRgb01 = (a, b, t) => ({ r: a.r * (1 - t) + b.r * t, g: a.g * (1 - t) + b.g * t, b: a.b * (1 - t) + b.b * t });
215
+ const relLum = (rgb) => {
216
+ const f = (u) => srgbToLinear(u);
217
+ return 0.2126 * f(rgb.r) + 0.7152 * f(rgb.g) + 0.0722 * f(rgb.b);
218
+ };
219
+ const contrast = (fg, bg) => {
220
+ const L1 = relLum(fg), L2 = relLum(bg); const a = Math.max(L1, L2), b = Math.min(L1, L2);
221
+ return (a + 0.05) / (b + 0.05);
222
+ };
223
+ try {
224
+ const bg = resolveCssToRgb01(bgCss);
225
+ if (!bg) return { fill: getCssVar('--text-color') || '#000', stroke: 'var(--transparent-page-contrast)', strokeWidth: 1 };
226
+ const candidatesCss = [getCssVar('--text-color') || '#111', getCssVar('--on-primary') || '#0f1115', '#000', '#fff'];
227
+ const candidates = candidatesCss
228
+ .map(css => ({ css, rgb: resolveCssToRgb01(css) }))
229
+ .filter(x => !!x.rgb);
230
+ // Pick the max contrast
231
+ let best = candidates[0]; let bestCR = contrast(best.rgb, bg);
232
+ for (let i = 1; i < candidates.length; i++) {
233
+ const cr = contrast(candidates[i].rgb, bg);
234
+ if (cr > bestCR) { best = candidates[i]; bestCR = cr; }
235
+ }
236
+ // Optional softening via blend factor (0..1), blending towards muted color
237
+ const blend = Math.min(1, Math.max(0, Number(opts.blend || 0)));
238
+ let finalRgb = best.rgb;
239
+ if (blend > 0) {
240
+ const mutedCss = getCssVar('--muted-color') || (getCssVar('--text-color') || '#111');
241
+ const mutedRgb = resolveCssToRgb01(mutedCss) || best.rgb;
242
+ finalRgb = mixRgb01(best.rgb, mutedRgb, blend);
243
+ }
244
+ const haloStrength = Math.min(1, Math.max(0, Number(opts.haloStrength == null ? 0.5 : opts.haloStrength)));
245
+ const stroke = (best.css === '#000' || best.css.toLowerCase() === 'black') ? `rgba(255,255,255,${0.30 + 0.40 * haloStrength})` : `rgba(0,0,0,${0.30 + 0.30 * haloStrength})`;
246
+ return { fill: toHex(finalRgb), stroke, strokeWidth: (opts.haloWidth == null ? 1 : Number(opts.haloWidth)) };
247
+ } catch {
248
+ return { fill: getCssVar('--text-color') || '#000', stroke: 'var(--transparent-page-contrast)', strokeWidth: 1 };
249
+ }
250
+ };
251
+ window.ColorPalettes = {
252
+ refresh: updatePalettes,
253
+ notify: () => { try { const primaryOKLCH = getPrimaryOKLCH(); const primaryHex = getPrimaryHex(); document.dispatchEvent(new CustomEvent('palettes:updated', { detail: { primary: primaryHex, primaryOKLCH } })); } catch { } },
254
+ getPrimary: () => getPrimaryHex(),
255
+ getPrimaryOKLCH: () => getPrimaryOKLCH(),
256
+ getColors: (key, count = 6) => {
257
+ const primaryOKLCH = getPrimaryOKLCH();
258
+ if (!primaryOKLCH) return [];
259
+ const total = Math.max(1, Math.min(12, Number(count) || 6));
260
+ if (key === 'categorical') return generators.categorical(primaryOKLCH, total);
261
+ if (key === 'sequential') return generators.sequential(primaryOKLCH, total);
262
+ if (key === 'diverging') return generators.diverging(primaryOKLCH, total);
263
+ return [];
264
+ },
265
+ getTextStyleForBackground: (bgCss, opts) => pickTextStyleForBackground(bgCss, opts || {}),
266
+ chooseReadableText: (bgCss, opts) => pickTextStyleForBackground(bgCss, opts || {})
267
+ };
268
+ };
269
+
270
+ if (document.readyState === 'loading') document.addEventListener('DOMContentLoaded', bootstrap, { once: true });
271
+ else bootstrap();
272
+ })();
273
+
274
+
app/scripts/export-latex.mjs ADDED
@@ -0,0 +1,318 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env node
2
+ import { spawn } from 'node:child_process';
3
+ import { promises as fs } from 'node:fs';
4
+ import { resolve, dirname, basename, extname } from 'node:path';
5
+ import process from 'node:process';
6
+
7
+ async function run(command, args = [], options = {}) {
8
+ return new Promise((resolvePromise, reject) => {
9
+ const child = spawn(command, args, { stdio: 'inherit', shell: false, ...options });
10
+ child.on('error', reject);
11
+ child.on('exit', (code) => {
12
+ if (code === 0) resolvePromise(undefined);
13
+ else reject(new Error(`${command} ${args.join(' ')} exited with code ${code}`));
14
+ });
15
+ });
16
+ }
17
+
18
+ function parseArgs(argv) {
19
+ const out = {};
20
+ for (const arg of argv.slice(2)) {
21
+ if (!arg.startsWith('--')) continue;
22
+ const [k, v] = arg.replace(/^--/, '').split('=');
23
+ out[k] = v === undefined ? true : v;
24
+ }
25
+ return out;
26
+ }
27
+
28
+ function slugify(text) {
29
+ return String(text || '')
30
+ .normalize('NFKD')
31
+ .replace(/\p{Diacritic}+/gu, '')
32
+ .toLowerCase()
33
+ .replace(/[^a-z0-9]+/g, '-')
34
+ .replace(/^-+|-+$/g, '')
35
+ .slice(0, 120) || 'article';
36
+ }
37
+
38
+ async function checkPandocInstalled() {
39
+ try {
40
+ await run('pandoc', ['--version'], { stdio: 'pipe' });
41
+ return true;
42
+ } catch {
43
+ return false;
44
+ }
45
+ }
46
+
47
+ async function readMdxFile(filePath) {
48
+ try {
49
+ const content = await fs.readFile(filePath, 'utf-8');
50
+ return content;
51
+ } catch (error) {
52
+ console.warn(`Warning: Could not read ${filePath}:`, error.message);
53
+ return '';
54
+ }
55
+ }
56
+
57
+ function extractFrontmatter(content) {
58
+ const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---\n/);
59
+ if (!frontmatterMatch) return { frontmatter: {}, content };
60
+
61
+ const frontmatterText = frontmatterMatch[1];
62
+ const contentWithoutFrontmatter = content.replace(frontmatterMatch[0], '');
63
+
64
+ // Simple YAML parsing for basic fields
65
+ const frontmatter = {};
66
+ const lines = frontmatterText.split('\n');
67
+ let currentKey = null;
68
+ let currentValue = '';
69
+
70
+ for (const line of lines) {
71
+ const trimmed = line.trim();
72
+ if (trimmed.includes(':') && !trimmed.startsWith('-')) {
73
+ if (currentKey) {
74
+ frontmatter[currentKey] = currentValue.trim();
75
+ }
76
+ const [key, ...valueParts] = trimmed.split(':');
77
+ currentKey = key.trim();
78
+ currentValue = valueParts.join(':').trim();
79
+ } else if (currentKey) {
80
+ currentValue += '\n' + trimmed;
81
+ }
82
+ }
83
+
84
+ if (currentKey) {
85
+ frontmatter[currentKey] = currentValue.trim();
86
+ }
87
+
88
+ return { frontmatter, content: contentWithoutFrontmatter };
89
+ }
90
+
91
+ function cleanMdxToMarkdown(content) {
92
+ // Remove import statements
93
+ content = content.replace(/^import .+?;?\s*$/gm, '');
94
+
95
+ // Remove JSX component calls like <ComponentName />
96
+ content = content.replace(/<[A-Z][a-zA-Z0-9]*\s*\/>/g, '');
97
+
98
+ // Convert JSX components to simpler markdown
99
+ // Handle Sidenote components specially
100
+ content = content.replace(/<Sidenote>([\s\S]*?)<\/Sidenote>/g, (match, innerContent) => {
101
+ // Extract main content and aside content
102
+ const asideMatch = innerContent.match(/<Fragment slot="aside">([\s\S]*?)<\/Fragment>/);
103
+ const mainContent = innerContent.replace(/<Fragment slot="aside">[\s\S]*?<\/Fragment>/, '').trim();
104
+ const asideContent = asideMatch ? asideMatch[1].trim() : '';
105
+
106
+ let result = mainContent;
107
+ if (asideContent) {
108
+ result += `\n\n> **Note:** ${asideContent}`;
109
+ }
110
+ return result;
111
+ });
112
+
113
+ // Handle Note components
114
+ content = content.replace(/<Note[^>]*>([\s\S]*?)<\/Note>/g, (match, innerContent) => {
115
+ return `\n> **Note:** ${innerContent.trim()}\n`;
116
+ });
117
+
118
+ // Handle Wide and FullWidth components
119
+ content = content.replace(/<(Wide|FullWidth)>([\s\S]*?)<\/\1>/g, '$2');
120
+
121
+ // Handle HtmlEmbed components (convert to simple text)
122
+ content = content.replace(/<HtmlEmbed[^>]*\/>/g, '*[Interactive content not available in LaTeX]*');
123
+
124
+ // Remove remaining JSX fragments
125
+ content = content.replace(/<Fragment[^>]*>([\s\S]*?)<\/Fragment>/g, '$1');
126
+ content = content.replace(/<[A-Z][a-zA-Z0-9]*[^>]*>([\s\S]*?)<\/[A-Z][a-zA-Z0-9]*>/g, '$1');
127
+
128
+ // Clean up className attributes
129
+ content = content.replace(/className="[^"]*"/g, '');
130
+
131
+ // Clean up extra whitespace
132
+ content = content.replace(/\n{3,}/g, '\n\n');
133
+
134
+ return content.trim();
135
+ }
136
+
137
+ async function processChapterImports(content, contentDir) {
138
+ let processedContent = content;
139
+
140
+ // First, extract all import statements and their corresponding component calls
141
+ const importPattern = /import\s+(\w+)\s+from\s+["']\.\/chapters\/([^"']+)["'];?/g;
142
+ const imports = new Map();
143
+ let match;
144
+
145
+ // Collect all imports
146
+ while ((match = importPattern.exec(content)) !== null) {
147
+ const [fullImport, componentName, chapterPath] = match;
148
+ imports.set(componentName, { path: chapterPath, importStatement: fullImport });
149
+ }
150
+
151
+ // Remove all import statements
152
+ processedContent = processedContent.replace(importPattern, '');
153
+
154
+ // Process each component call
155
+ for (const [componentName, { path: chapterPath }] of imports) {
156
+ const componentCallPattern = new RegExp(`<${componentName}\\s*\\/>`, 'g');
157
+
158
+ try {
159
+ const chapterFile = resolve(contentDir, 'chapters', chapterPath);
160
+ const chapterContent = await readMdxFile(chapterFile);
161
+ const { content: chapterMarkdown } = extractFrontmatter(chapterContent);
162
+ const cleanChapter = cleanMdxToMarkdown(chapterMarkdown);
163
+
164
+ processedContent = processedContent.replace(componentCallPattern, cleanChapter);
165
+ console.log(`✅ Processed chapter: ${chapterPath}`);
166
+ } catch (error) {
167
+ console.warn(`Warning: Could not process chapter ${chapterPath}:`, error.message);
168
+ processedContent = processedContent.replace(componentCallPattern, `\n*[Chapter ${chapterPath} could not be loaded]*\n`);
169
+ }
170
+ }
171
+
172
+ return processedContent;
173
+ }
174
+
175
+ function createLatexPreamble(frontmatter) {
176
+ const title = frontmatter.title ? frontmatter.title.replace(/\n/g, ' ') : 'Untitled Article';
177
+ const subtitle = frontmatter.subtitle || '';
178
+ const authors = frontmatter.authors || '';
179
+ const date = frontmatter.published || '';
180
+
181
+ return `\\documentclass[11pt,a4paper]{article}
182
+ \\usepackage[utf8]{inputenc}
183
+ \\usepackage[T1]{fontenc}
184
+ \\usepackage{amsmath,amsfonts,amssymb}
185
+ \\usepackage{graphicx}
186
+ \\usepackage{hyperref}
187
+ \\usepackage{booktabs}
188
+ \\usepackage{longtable}
189
+ \\usepackage{array}
190
+ \\usepackage{multirow}
191
+ \\usepackage{wrapfig}
192
+ \\usepackage{float}
193
+ \\usepackage{colortbl}
194
+ \\usepackage{pdflscape}
195
+ \\usepackage{tabu}
196
+ \\usepackage{threeparttable}
197
+ \\usepackage{threeparttablex}
198
+ \\usepackage{ulem}
199
+ \\usepackage{makecell}
200
+ \\usepackage{xcolor}
201
+ \\usepackage{listings}
202
+ \\usepackage{fancyvrb}
203
+ \\usepackage{geometry}
204
+ \\geometry{margin=1in}
205
+
206
+ \\title{${title}${subtitle ? `\\\\\\large ${subtitle}` : ''}}
207
+ ${authors ? `\\author{${authors}}` : ''}
208
+ ${date ? `\\date{${date}}` : ''}
209
+
210
+ \\begin{document}
211
+ \\maketitle
212
+ \\tableofcontents
213
+ \\newpage
214
+
215
+ `;
216
+ }
217
+
218
+ async function main() {
219
+ const cwd = process.cwd();
220
+ const args = parseArgs(process.argv);
221
+
222
+ // Check if pandoc is installed
223
+ const hasPandoc = await checkPandocInstalled();
224
+ if (!hasPandoc) {
225
+ console.error('❌ Pandoc is not installed. Please install it first:');
226
+ console.error(' macOS: brew install pandoc');
227
+ console.error(' Ubuntu: apt-get install pandoc');
228
+ console.error(' Windows: choco install pandoc');
229
+ process.exit(1);
230
+ }
231
+
232
+ const contentDir = resolve(cwd, 'src/content');
233
+ const articleFile = resolve(contentDir, 'article.mdx');
234
+
235
+ // Check if article.mdx exists
236
+ try {
237
+ await fs.access(articleFile);
238
+ } catch {
239
+ console.error(`❌ Could not find article.mdx at ${articleFile}`);
240
+ process.exit(1);
241
+ }
242
+
243
+ console.log('> Reading article content...');
244
+ const articleContent = await readMdxFile(articleFile);
245
+ const { frontmatter, content } = extractFrontmatter(articleContent);
246
+
247
+ console.log('> Processing chapters...');
248
+ const processedContent = await processChapterImports(content, contentDir);
249
+
250
+ console.log('> Converting MDX to Markdown...');
251
+ const markdownContent = cleanMdxToMarkdown(processedContent);
252
+
253
+ // Generate output filename
254
+ const title = frontmatter.title ? frontmatter.title.replace(/\n/g, ' ') : 'article';
255
+ const outFileBase = args.filename ? String(args.filename).replace(/\.(tex|pdf)$/i, '') : slugify(title);
256
+
257
+ // Create temporary markdown file
258
+ const tempMdFile = resolve(cwd, 'temp-article.md');
259
+ await fs.writeFile(tempMdFile, markdownContent);
260
+
261
+
262
+ console.log('> Converting to LaTeX with Pandoc...');
263
+ const outputLatex = resolve(cwd, 'dist', `${outFileBase}.tex`);
264
+
265
+ // Ensure dist directory exists
266
+ await fs.mkdir(resolve(cwd, 'dist'), { recursive: true });
267
+
268
+ // Pandoc conversion arguments
269
+ const pandocArgs = [
270
+ tempMdFile,
271
+ '-o', outputLatex,
272
+ '--from=markdown',
273
+ '--to=latex',
274
+ '--standalone',
275
+ '--toc',
276
+ '--number-sections',
277
+ '--highlight-style=tango',
278
+ '--listings'
279
+ ];
280
+
281
+ // Add bibliography if it exists
282
+ const bibFile = resolve(contentDir, 'bibliography.bib');
283
+ try {
284
+ await fs.access(bibFile);
285
+ pandocArgs.push('--bibliography', bibFile);
286
+ pandocArgs.push('--citeproc');
287
+ console.log('✅ Found bibliography file, including citations');
288
+ } catch {
289
+ console.log('ℹ️ No bibliography file found');
290
+ }
291
+
292
+ try {
293
+ await run('pandoc', pandocArgs);
294
+ console.log(`✅ LaTeX generated: ${outputLatex}`);
295
+
296
+ // Optionally compile to PDF if requested
297
+ if (args.pdf) {
298
+ console.log('> Compiling LaTeX to PDF...');
299
+ const outputPdf = resolve(cwd, 'dist', `${outFileBase}.pdf`);
300
+ await run('pdflatex', ['-output-directory', resolve(cwd, 'dist'), outputLatex]);
301
+ console.log(`✅ PDF generated: ${outputPdf}`);
302
+ }
303
+
304
+ } catch (error) {
305
+ console.error('❌ Pandoc conversion failed:', error.message);
306
+ process.exit(1);
307
+ } finally {
308
+ // Clean up temporary file
309
+ try {
310
+ await fs.unlink(tempMdFile);
311
+ } catch { }
312
+ }
313
+ }
314
+
315
+ main().catch((err) => {
316
+ console.error(err);
317
+ process.exit(1);
318
+ });
app/scripts/export-pdf.mjs ADDED
@@ -0,0 +1,483 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env node
2
+ import { spawn } from 'node:child_process';
3
+ import { setTimeout as delay } from 'node:timers/promises';
4
+ import { chromium } from 'playwright';
5
+ import { resolve } from 'node:path';
6
+ import { promises as fs } from 'node:fs';
7
+ import process from 'node:process';
8
+
9
+ async function run(command, args = [], options = {}) {
10
+ return new Promise((resolvePromise, reject) => {
11
+ const child = spawn(command, args, { stdio: 'inherit', shell: false, ...options });
12
+ child.on('error', reject);
13
+ child.on('exit', (code) => {
14
+ if (code === 0) resolvePromise(undefined);
15
+ else reject(new Error(`${command} ${args.join(' ')} exited with code ${code}`));
16
+ });
17
+ });
18
+ }
19
+
20
+ async function waitForServer(url, timeoutMs = 60000) {
21
+ const start = Date.now();
22
+ while (Date.now() - start < timeoutMs) {
23
+ try {
24
+ const res = await fetch(url);
25
+ if (res.ok) return;
26
+ } catch {}
27
+ await delay(500);
28
+ }
29
+ throw new Error(`Server did not start in time: ${url}`);
30
+ }
31
+
32
+ function parseArgs(argv) {
33
+ const out = {};
34
+ for (const arg of argv.slice(2)) {
35
+ if (!arg.startsWith('--')) continue;
36
+ const [k, v] = arg.replace(/^--/, '').split('=');
37
+ out[k] = v === undefined ? true : v;
38
+ }
39
+ return out;
40
+ }
41
+
42
+ function slugify(text) {
43
+ return String(text || '')
44
+ .normalize('NFKD')
45
+ .replace(/\p{Diacritic}+/gu, '')
46
+ .toLowerCase()
47
+ .replace(/[^a-z0-9]+/g, '-')
48
+ .replace(/^-+|-+$/g, '')
49
+ .slice(0, 120) || 'article';
50
+ }
51
+
52
+ function parseMargin(margin) {
53
+ if (!margin) return { top: '12mm', right: '12mm', bottom: '16mm', left: '12mm' };
54
+ const parts = String(margin).split(',').map(s => s.trim()).filter(Boolean);
55
+ if (parts.length === 1) {
56
+ return { top: parts[0], right: parts[0], bottom: parts[0], left: parts[0] };
57
+ }
58
+ if (parts.length === 2) {
59
+ return { top: parts[0], right: parts[1], bottom: parts[0], left: parts[1] };
60
+ }
61
+ if (parts.length === 3) {
62
+ return { top: parts[0], right: parts[1], bottom: parts[2], left: parts[1] };
63
+ }
64
+ return { top: parts[0] || '12mm', right: parts[1] || '12mm', bottom: parts[2] || '16mm', left: parts[3] || '12mm' };
65
+ }
66
+
67
+ function cssLengthToMm(val) {
68
+ if (!val) return 0;
69
+ const s = String(val).trim();
70
+ if (/mm$/i.test(s)) return parseFloat(s);
71
+ if (/cm$/i.test(s)) return parseFloat(s) * 10;
72
+ if (/in$/i.test(s)) return parseFloat(s) * 25.4;
73
+ if (/px$/i.test(s)) return (parseFloat(s) / 96) * 25.4; // 96 CSS px per inch
74
+ const num = parseFloat(s);
75
+ return Number.isFinite(num) ? num : 0; // assume mm if unitless
76
+ }
77
+
78
+ function getFormatSizeMm(format) {
79
+ const f = String(format || 'A4').toLowerCase();
80
+ switch (f) {
81
+ case 'letter': return { w: 215.9, h: 279.4 };
82
+ case 'legal': return { w: 215.9, h: 355.6 };
83
+ case 'a3': return { w: 297, h: 420 };
84
+ case 'tabloid': return { w: 279.4, h: 431.8 };
85
+ case 'a4':
86
+ default: return { w: 210, h: 297 };
87
+ }
88
+ }
89
+
90
+ async function waitForImages(page, timeoutMs = 15000) {
91
+ await page.evaluate(async (timeout) => {
92
+ const deadline = Date.now() + timeout;
93
+ const imgs = Array.from(document.images || []);
94
+ const unloaded = imgs.filter(img => !img.complete || (img.naturalWidth === 0));
95
+ await Promise.race([
96
+ Promise.all(unloaded.map(img => new Promise(res => {
97
+ if (img.complete && img.naturalWidth !== 0) return res(undefined);
98
+ img.addEventListener('load', () => res(undefined), { once: true });
99
+ img.addEventListener('error', () => res(undefined), { once: true });
100
+ }))),
101
+ new Promise(res => setTimeout(res, Math.max(0, deadline - Date.now())))
102
+ ]);
103
+ }, timeoutMs);
104
+ }
105
+
106
+ async function waitForPlotly(page, timeoutMs = 20000) {
107
+ await page.evaluate(async (timeout) => {
108
+ const start = Date.now();
109
+ const hasPlots = () => Array.from(document.querySelectorAll('.js-plotly-plot')).length > 0;
110
+ // Wait until plots exist or timeout
111
+ while (!hasPlots() && (Date.now() - start) < timeout) {
112
+ await new Promise(r => setTimeout(r, 200));
113
+ }
114
+ const deadline = start + timeout;
115
+ // Then wait until each plot contains the main svg
116
+ const allReady = () => Array.from(document.querySelectorAll('.js-plotly-plot')).every(el => el.querySelector('svg.main-svg'));
117
+ while (!allReady() && Date.now() < deadline) {
118
+ await new Promise(r => setTimeout(r, 200));
119
+ }
120
+ }, timeoutMs);
121
+ }
122
+
123
+ async function waitForD3(page, timeoutMs = 20000) {
124
+ await page.evaluate(async (timeout) => {
125
+ const start = Date.now();
126
+ const isReady = () => {
127
+ // Prioritize hero banner if present (generic container)
128
+ const hero = document.querySelector('.hero-banner');
129
+ if (hero) {
130
+ return !!hero.querySelector('svg circle, svg path, svg rect, svg g');
131
+ }
132
+ // Else require all D3 containers on page to have shapes
133
+ const containers = [
134
+ ...Array.from(document.querySelectorAll('.d3-line')),
135
+ ...Array.from(document.querySelectorAll('.d3-bar'))
136
+ ];
137
+ if (!containers.length) return true;
138
+ return containers.every(c => c.querySelector('svg circle, svg path, svg rect, svg g'));
139
+ };
140
+ while (!isReady() && (Date.now() - start) < timeout) {
141
+ await new Promise(r => setTimeout(r, 200));
142
+ }
143
+ }, timeoutMs);
144
+ }
145
+
146
+ async function waitForStableLayout(page, timeoutMs = 5000) {
147
+ const start = Date.now();
148
+ let last = await page.evaluate(() => document.scrollingElement ? document.scrollingElement.scrollHeight : document.body.scrollHeight);
149
+ let stableCount = 0;
150
+ while ((Date.now() - start) < timeoutMs && stableCount < 3) {
151
+ await page.waitForTimeout(250);
152
+ const now = await page.evaluate(() => document.scrollingElement ? document.scrollingElement.scrollHeight : document.body.scrollHeight);
153
+ if (now === last) stableCount += 1; else { stableCount = 0; last = now; }
154
+ }
155
+ }
156
+
157
+ async function main() {
158
+ const cwd = process.cwd();
159
+ const port = Number(process.env.PREVIEW_PORT || 8080);
160
+ const baseUrl = `http://127.0.0.1:${port}/`;
161
+ const args = parseArgs(process.argv);
162
+ // Default: light (do not rely on env vars implicitly)
163
+ const theme = (args.theme === 'dark' || args.theme === 'light') ? args.theme : 'light';
164
+ const format = args.format || 'A4';
165
+ const margin = parseMargin(args.margin);
166
+ const wait = (args.wait || 'full'); // 'networkidle' | 'images' | 'plotly' | 'full'
167
+
168
+ // filename can be provided, else computed from DOM (button) or page title later
169
+ let outFileBase = (args.filename && String(args.filename).replace(/\.pdf$/i, '')) || 'article';
170
+
171
+ // Build only if dist/ does not exist
172
+ const distDir = resolve(cwd, 'dist');
173
+ let hasDist = false;
174
+ try {
175
+ const st = await fs.stat(distDir);
176
+ hasDist = st && st.isDirectory();
177
+ } catch {}
178
+ if (!hasDist) {
179
+ console.log('> Building Astro site…');
180
+ await run('npm', ['run', 'build']);
181
+ } else {
182
+ console.log('> Skipping build (dist/ exists)…');
183
+ }
184
+
185
+ console.log('> Starting Astro preview…');
186
+ // Start preview in its own process group so we can terminate all children reliably
187
+ const preview = spawn('npm', ['run', 'preview'], { cwd, stdio: 'inherit', detached: true });
188
+ const previewExit = new Promise((resolvePreview) => {
189
+ preview.on('close', (code, signal) => resolvePreview({ code, signal }));
190
+ });
191
+
192
+ try {
193
+ await waitForServer(baseUrl, 60000);
194
+ console.log('> Server ready, generating PDF…');
195
+
196
+ const browser = await chromium.launch({ headless: true });
197
+ try {
198
+ const context = await browser.newContext();
199
+ await context.addInitScript((desired) => {
200
+ try {
201
+ localStorage.setItem('theme', desired);
202
+ // Apply theme immediately to avoid flashes
203
+ if (document && document.documentElement) {
204
+ document.documentElement.dataset.theme = desired;
205
+ }
206
+ } catch {}
207
+ }, theme);
208
+ const page = await context.newPage();
209
+ // Pre-fit viewport width to printable width so charts size correctly
210
+ const fmt = getFormatSizeMm(format);
211
+ const mw = fmt.w - cssLengthToMm(margin.left) - cssLengthToMm(margin.right);
212
+ const printableWidthPx = Math.max(320, Math.round((mw / 25.4) * 96));
213
+ await page.setViewportSize({ width: printableWidthPx, height: 1200 });
214
+ await page.goto(baseUrl, { waitUntil: 'load', timeout: 60000 });
215
+ // Give time for CDN scripts (Plotly/D3) to attach and for our fragment hooks to run
216
+ try { await page.waitForFunction(() => !!window.Plotly, { timeout: 8000 }); } catch {}
217
+ try { await page.waitForFunction(() => !!window.d3, { timeout: 8000 }); } catch {}
218
+ // Prefer explicit filename from the download button if present
219
+ if (!args.filename) {
220
+ const fromBtn = await page.evaluate(() => {
221
+ const btn = document.getElementById('download-pdf-btn');
222
+ const f = btn ? btn.getAttribute('data-pdf-filename') : null;
223
+ return f || '';
224
+ });
225
+ if (fromBtn) {
226
+ outFileBase = String(fromBtn).replace(/\.pdf$/i, '');
227
+ } else {
228
+ // Fallback: compute slug from hero title or document.title
229
+ const title = await page.evaluate(() => {
230
+ const h1 = document.querySelector('h1.hero-title');
231
+ const t = h1 ? h1.textContent : document.title;
232
+ return (t || '').replace(/\s+/g, ' ').trim();
233
+ });
234
+ outFileBase = slugify(title);
235
+ }
236
+ }
237
+
238
+ // Wait for render readiness
239
+ if (wait === 'images' || wait === 'full') {
240
+ await waitForImages(page);
241
+ }
242
+ if (wait === 'd3' || wait === 'full') {
243
+ await waitForD3(page);
244
+ }
245
+ if (wait === 'plotly' || wait === 'full') {
246
+ await waitForPlotly(page);
247
+ }
248
+ if (wait === 'full') {
249
+ await waitForStableLayout(page);
250
+ }
251
+ await page.emulateMedia({ media: 'print' });
252
+
253
+ // Enforce responsive sizing for SVG/iframes by removing hard attrs and injecting CSS (top-level and inside same-origin iframes)
254
+ try {
255
+ await page.evaluate(() => {
256
+ function isSmallSvg(svg){
257
+ try {
258
+ const vb = svg && svg.viewBox && svg.viewBox.baseVal ? svg.viewBox.baseVal : null;
259
+ if (vb && vb.width && vb.height && vb.width <= 50 && vb.height <= 50) return true;
260
+ const r = svg.getBoundingClientRect && svg.getBoundingClientRect();
261
+ if (r && r.width && r.height && r.width <= 50 && r.height <= 50) return true;
262
+ } catch {}
263
+ return false;
264
+ }
265
+ function lockSmallSvgSize(svg){
266
+ try {
267
+ const r = svg.getBoundingClientRect ? svg.getBoundingClientRect() : null;
268
+ const w = (r && r.width) ? Math.round(r.width) : null;
269
+ const h = (r && r.height) ? Math.round(r.height) : null;
270
+ if (w) svg.style.setProperty('width', w + 'px', 'important');
271
+ if (h) svg.style.setProperty('height', h + 'px', 'important');
272
+ svg.style.setProperty('max-width', 'none', 'important');
273
+ } catch {}
274
+ }
275
+ function fixSvg(svg){
276
+ if (!svg) return;
277
+ // Do not alter hero banner SVG sizing; it may rely on explicit width/height
278
+ try { if (svg.closest && svg.closest('.hero-banner')) return; } catch {}
279
+ if (isSmallSvg(svg)) { lockSmallSvgSize(svg); return; }
280
+ try { svg.removeAttribute('width'); } catch {}
281
+ try { svg.removeAttribute('height'); } catch {}
282
+ svg.style.maxWidth = '100%';
283
+ svg.style.width = '100%';
284
+ svg.style.height = 'auto';
285
+ if (!svg.getAttribute('preserveAspectRatio')) svg.setAttribute('preserveAspectRatio','xMidYMid meet');
286
+ }
287
+ document.querySelectorAll('svg').forEach(fixSvg);
288
+ document.querySelectorAll('.mermaid, .mermaid svg').forEach((el)=>{
289
+ if (el.tagName && el.tagName.toLowerCase() === 'svg') fixSvg(el);
290
+ else { el.style.display='block'; el.style.width='100%'; el.style.maxWidth='100%'; }
291
+ });
292
+ document.querySelectorAll('iframe, embed, object').forEach((el) => {
293
+ el.style.width = '100%';
294
+ el.style.maxWidth = '100%';
295
+ try { el.removeAttribute('width'); } catch {}
296
+ // Best-effort inject into same-origin frames
297
+ try {
298
+ const doc = (el.tagName.toLowerCase()==='object' ? el.contentDocument : el.contentDocument);
299
+ if (doc && doc.head) {
300
+ const s = doc.createElement('style');
301
+ s.textContent = 'html,body{overflow-x:hidden;} svg,canvas,img,video{max-width:100%!important;height:auto!important;} svg[width]{width:100%!important}';
302
+ doc.head.appendChild(s);
303
+ doc.querySelectorAll('svg').forEach((svg)=>{ if (isSmallSvg(svg)) lockSmallSvgSize(svg); else fixSvg(svg); });
304
+ }
305
+ } catch (_) { /* cross-origin; ignore */ }
306
+ });
307
+ });
308
+ } catch {}
309
+
310
+ // Generate OG thumbnail (1200x630)
311
+ try {
312
+ const ogW = 1200, ogH = 630;
313
+ await page.setViewportSize({ width: ogW, height: ogH });
314
+ // Give layout a tick to adjust
315
+ await page.waitForTimeout(200);
316
+ // Ensure layout & D3 re-rendered after viewport change
317
+ await page.evaluate(() => { window.scrollTo(0, 0); window.dispatchEvent(new Event('resize')); });
318
+ try { await waitForD3(page, 8000); } catch {}
319
+
320
+ // Temporarily improve visibility for light theme thumbnails
321
+ // - Force normal blend for points
322
+ // - Ensure an SVG background (CSS background on svg element)
323
+ const cssHandle = await page.addStyleTag({ content: `
324
+ .hero .points { mix-blend-mode: normal !important; }
325
+ ` });
326
+ const thumbPath = resolve(cwd, 'dist', 'thumb.auto.jpg');
327
+ await page.screenshot({ path: thumbPath, type: 'jpeg', quality: 85, fullPage: false });
328
+ // Also emit PNG for compatibility if needed
329
+ const thumbPngPath = resolve(cwd, 'dist', 'thumb.auto.png');
330
+ await page.screenshot({ path: thumbPngPath, type: 'png', fullPage: false });
331
+ const publicThumb = resolve(cwd, 'public', 'thumb.auto.jpg');
332
+ const publicThumbPng = resolve(cwd, 'public', 'thumb.auto.png');
333
+ try { await fs.copyFile(thumbPath, publicThumb); } catch {}
334
+ try { await fs.copyFile(thumbPngPath, publicThumbPng); } catch {}
335
+ // Remove temporary style so PDF is unaffected
336
+ try { await cssHandle.evaluate((el) => el.remove()); } catch {}
337
+ console.log(`✅ OG thumbnail generated: ${thumbPath}`);
338
+ } catch (e) {
339
+ console.warn('Unable to generate OG thumbnail:', e?.message || e);
340
+ }
341
+ const outPath = resolve(cwd, 'dist', `${outFileBase}.pdf`);
342
+ // Restore viewport to printable width before PDF (thumbnail changed it)
343
+ try {
344
+ const fmt2 = getFormatSizeMm(format);
345
+ const mw2 = fmt2.w - cssLengthToMm(margin.left) - cssLengthToMm(margin.right);
346
+ const printableWidthPx2 = Math.max(320, Math.round((mw2 / 25.4) * 96));
347
+ await page.setViewportSize({ width: printableWidthPx2, height: 1400 });
348
+ await page.evaluate(() => { window.scrollTo(0, 0); window.dispatchEvent(new Event('resize')); });
349
+ try { await waitForD3(page, 8000); } catch {}
350
+ await waitForStableLayout(page);
351
+ // Re-apply responsive fixes after viewport change
352
+ try {
353
+ await page.evaluate(() => {
354
+ function isSmallSvg(svg){
355
+ try {
356
+ const vb = svg && svg.viewBox && svg.viewBox.baseVal ? svg.viewBox.baseVal : null;
357
+ if (vb && vb.width && vb.height && vb.width <= 50 && vb.height <= 50) return true;
358
+ const r = svg.getBoundingClientRect && svg.getBoundingClientRect();
359
+ if (r && r.width && r.height && r.width <= 50 && r.height <= 50) return true;
360
+ } catch {}
361
+ return false;
362
+ }
363
+ function lockSmallSvgSize(svg){
364
+ try {
365
+ const r = svg.getBoundingClientRect ? svg.getBoundingClientRect() : null;
366
+ const w = (r && r.width) ? Math.round(r.width) : null;
367
+ const h = (r && r.height) ? Math.round(r.height) : null;
368
+ if (w) svg.style.setProperty('width', w + 'px', 'important');
369
+ if (h) svg.style.setProperty('height', h + 'px', 'important');
370
+ svg.style.setProperty('max-width', 'none', 'important');
371
+ } catch {}
372
+ }
373
+ function fixSvg(svg){
374
+ if (!svg) return;
375
+ // Do not alter hero banner SVG sizing; it may rely on explicit width/height
376
+ try { if (svg.closest && svg.closest('.hero-banner')) return; } catch {}
377
+ if (isSmallSvg(svg)) { lockSmallSvgSize(svg); return; }
378
+ try { svg.removeAttribute('width'); } catch {}
379
+ try { svg.removeAttribute('height'); } catch {}
380
+ svg.style.maxWidth = '100%';
381
+ svg.style.width = '100%';
382
+ svg.style.height = 'auto';
383
+ if (!svg.getAttribute('preserveAspectRatio')) svg.setAttribute('preserveAspectRatio','xMidYMid meet');
384
+ }
385
+ document.querySelectorAll('svg').forEach((svg)=>{ if (isSmallSvg(svg)) lockSmallSvgSize(svg); else fixSvg(svg); });
386
+ document.querySelectorAll('.mermaid, .mermaid svg').forEach((el)=>{
387
+ if (el.tagName && el.tagName.toLowerCase() === 'svg') fixSvg(el);
388
+ else { el.style.display='block'; el.style.width='100%'; el.style.maxWidth='100%'; }
389
+ });
390
+ document.querySelectorAll('iframe, embed, object').forEach((el) => {
391
+ el.style.width = '100%';
392
+ el.style.maxWidth = '100%';
393
+ try { el.removeAttribute('width'); } catch {}
394
+ try {
395
+ const doc = (el.tagName.toLowerCase()==='object' ? el.contentDocument : el.contentDocument);
396
+ if (doc && doc.head) {
397
+ const s = doc.createElement('style');
398
+ s.textContent = 'html,body{overflow-x:hidden;} svg,canvas,img,video{max-width:100%!important;height:auto!important;} svg[width]{width:100%!important}';
399
+ doc.head.appendChild(s);
400
+ doc.querySelectorAll('svg').forEach((svg)=>{ if (isSmallSvg(svg)) lockSmallSvgSize(svg); else fixSvg(svg); });
401
+ }
402
+ } catch (_) {}
403
+ });
404
+ });
405
+ } catch {}
406
+ } catch {}
407
+ // Temporarily enforce print-safe responsive sizing (SVG/iframes) and improve banner visibility
408
+ let pdfCssHandle = null;
409
+ try {
410
+ pdfCssHandle = await page.addStyleTag({ content: `
411
+ /* General container safety */
412
+ html, body { overflow-x: hidden !important; }
413
+
414
+ /* Make all vector/bitmap media responsive for print */
415
+ svg, canvas, img, video { max-width: 100% !important; height: auto !important; }
416
+ /* Mermaid diagrams */
417
+ .mermaid, .mermaid svg { display: block; width: 100% !important; max-width: 100% !important; height: auto !important; }
418
+ /* Any explicit width attributes */
419
+ svg[width] { width: 100% !important; }
420
+ /* Iframes and similar embeds */
421
+ iframe, embed, object { width: 100% !important; max-width: 100% !important; height: auto; }
422
+
423
+ /* HtmlEmbed wrappers (defensive) */
424
+ .html-embed, .html-embed__card { max-width: 100% !important; width: 100% !important; }
425
+ .html-embed__card > div[id^="frag-"] { width: 100% !important; max-width: 100% !important; }
426
+
427
+ /* Banner centering & visibility */
428
+ .hero .points { mix-blend-mode: normal !important; }
429
+ /* Do NOT force a fixed height to avoid clipping in PDF */
430
+ .hero-banner { width: 100% !important; max-width: 980px !important; margin-left: auto !important; margin-right: auto !important; }
431
+ .hero-banner svg { width: 100% !important; height: auto !important; }
432
+ ` });
433
+ } catch {}
434
+ await page.pdf({
435
+ path: outPath,
436
+ format,
437
+ printBackground: true,
438
+ margin
439
+ });
440
+ try { if (pdfCssHandle) await pdfCssHandle.evaluate((el) => el.remove()); } catch {}
441
+ console.log(`✅ PDF generated: ${outPath}`);
442
+
443
+ // Copy into public only under the slugified name
444
+ const publicSlugPath = resolve(cwd, 'public', `${outFileBase}.pdf`);
445
+ try {
446
+ await fs.mkdir(resolve(cwd, 'public'), { recursive: true });
447
+ await fs.copyFile(outPath, publicSlugPath);
448
+ console.log(`✅ PDF copied to: ${publicSlugPath}`);
449
+ } catch (e) {
450
+ console.warn('Unable to copy PDF to public/:', e?.message || e);
451
+ }
452
+ } finally {
453
+ await browser.close();
454
+ }
455
+ } finally {
456
+ // Try a clean shutdown of preview (entire process group first)
457
+ try {
458
+ if (process.platform !== 'win32') {
459
+ try { process.kill(-preview.pid, 'SIGINT'); } catch {}
460
+ }
461
+ try { preview.kill('SIGINT'); } catch {}
462
+ await Promise.race([previewExit, delay(3000)]);
463
+ // Force kill if still alive
464
+ // eslint-disable-next-line no-unsafe-optional-chaining
465
+ if (!preview.killed) {
466
+ try {
467
+ if (process.platform !== 'win32') {
468
+ try { process.kill(-preview.pid, 'SIGKILL'); } catch {}
469
+ }
470
+ try { preview.kill('SIGKILL'); } catch {}
471
+ } catch {}
472
+ await Promise.race([previewExit, delay(1000)]);
473
+ }
474
+ } catch {}
475
+ }
476
+ }
477
+
478
+ main().catch((err) => {
479
+ console.error(err);
480
+ process.exit(1);
481
+ });
482
+
483
+
app/scripts/generate-trackio-data.mjs ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env node
2
+
3
+ // Generate synthetic Trackio-like CSV data with realistic ML curves.
4
+ // - Steps are simple integers (e.g., 1..N)
5
+ // - Metrics: epoch, train_accuracy, val_accuracy, train_loss, val_loss
6
+ // - W&B-like run names (e.g., pleasant-flower-1)
7
+ // - Deterministic with --seed
8
+ //
9
+ // Usage:
10
+ // node app/scripts/generate-trackio-data.mjs \
11
+ // --runs 3 \
12
+ // --steps 10 \
13
+ // --out app/src/content/assets/data/trackio_wandb_synth.csv \
14
+ // [--seed 42] [--epoch-max 3.0] [--amount 1.0] [--start 1]
15
+ //
16
+ // To overwrite the demo file used by the embed:
17
+ // node app/scripts/generate-trackio-data.mjs --runs 3 --steps 10 --out app/src/content/assets/data/trackio_wandb_demo.csv --seed 1337
18
+
19
+ import fs from 'node:fs/promises';
20
+ import path from 'node:path';
21
+
22
+ function parseArgs(argv){
23
+ const args = { runs: 3, steps: 10, out: '', seed: undefined, epochMax: 3.0, amount: 1, start: 1 };
24
+ for (let i = 2; i < argv.length; i++){
25
+ const a = argv[i];
26
+ if (a === '--runs' && argv[i+1]) { args.runs = Math.max(1, parseInt(argv[++i], 10) || 3); continue; }
27
+ if (a === '--steps' && argv[i+1]) { args.steps = Math.max(2, parseInt(argv[++i], 10) || 10); continue; }
28
+ if (a === '--out' && argv[i+1]) { args.out = argv[++i]; continue; }
29
+ if (a === '--seed' && argv[i+1]) { args.seed = Number(argv[++i]); continue; }
30
+ if (a === '--epoch-max' && argv[i+1]) { args.epochMax = Number(argv[++i]) || 3.0; continue; }
31
+ if (a === '--amount' && argv[i+1]) { args.amount = Number(argv[++i]) || 1.0; continue; }
32
+ if (a === '--start' && argv[i+1]) { args.start = parseInt(argv[++i], 10) || 1; continue; }
33
+ }
34
+ if (!args.out) {
35
+ args.out = path.join('app', 'src', 'content', 'assets', 'data', 'trackio_wandb_synth.csv');
36
+ }
37
+ return args;
38
+ }
39
+
40
+ function mulberry32(seed){
41
+ let t = seed >>> 0;
42
+ return function(){
43
+ t += 0x6D2B79F5;
44
+ let r = Math.imul(t ^ (t >>> 15), 1 | t);
45
+ r ^= r + Math.imul(r ^ (r >>> 7), 61 | r);
46
+ return ((r ^ (r >>> 14)) >>> 0) / 4294967296;
47
+ };
48
+ }
49
+
50
+ function makeRng(seed){
51
+ if (Number.isFinite(seed)) return mulberry32(seed);
52
+ return Math.random;
53
+ }
54
+
55
+ function randn(rng){
56
+ // Box-Muller transform
57
+ let u = 0, v = 0;
58
+ while (u === 0) u = rng();
59
+ while (v === 0) v = rng();
60
+ return Math.sqrt(-2.0 * Math.log(u)) * Math.cos(2.0 * Math.PI * v);
61
+ }
62
+
63
+ function clamp(x, lo, hi){
64
+ return Math.max(lo, Math.min(hi, x));
65
+ }
66
+
67
+ function logistic(t, k=6, x0=0.5){
68
+ // 1 / (1 + e^{-k (t - x0)}) in [0,1]
69
+ return 1 / (1 + Math.exp(-k * (t - x0)));
70
+ }
71
+
72
+ function expDecay(t, k=3){
73
+ // (1 - e^{-k t}) in [0,1]
74
+ return 1 - Math.exp(-k * t);
75
+ }
76
+
77
+ function pick(array, rng){
78
+ return array[Math.floor(rng() * array.length) % array.length];
79
+ }
80
+
81
+ function buildRunNames(count, rng){
82
+ const adjectives = [
83
+ 'pleasant','brisk','silent','ancient','bold','gentle','rapid','shy','curious','lively',
84
+ 'fearless','soothing','glossy','hidden','misty','bright','calm','keen','noble','swift'
85
+ ];
86
+ const nouns = [
87
+ 'flower','glade','sky','river','forest','ember','comet','meadow','harbor','dawn',
88
+ 'mountain','prairie','breeze','valley','lagoon','desert','monsoon','reef','thunder','willow'
89
+ ];
90
+ const names = new Set();
91
+ let attempts = 0;
92
+ while (names.size < count && attempts < count * 20){
93
+ attempts++;
94
+ const left = pick(adjectives, rng);
95
+ const right = pick(nouns, rng);
96
+ const idx = 1 + Math.floor(rng() * 9);
97
+ names.add(`${left}-${right}-${idx}`);
98
+ }
99
+ return Array.from(names);
100
+ }
101
+
102
+ function formatLike(value, decimals){
103
+ return Number.isFinite(decimals) && decimals >= 0 ? value.toFixed(decimals) : String(value);
104
+ }
105
+
106
+ async function main(){
107
+ const args = parseArgs(process.argv);
108
+ const rng = makeRng(args.seed);
109
+
110
+ // Steps: integers from start .. start+steps-1
111
+ const steps = Array.from({ length: args.steps }, (_, i) => args.start + i);
112
+ const stepNorm = (i) => (i - steps[0]) / (steps[steps.length-1] - steps[0]);
113
+
114
+ const runs = buildRunNames(args.runs, rng);
115
+
116
+ // Per-run slight variations
117
+ const runParams = runs.map((_r, idx) => {
118
+ const r = rng();
119
+ // Final accuracies
120
+ const trainAccFinal = clamp(0.86 + (r - 0.5) * 0.12 * args.amount, 0.78, 0.97);
121
+ const valAccFinal = clamp(trainAccFinal - (0.02 + rng() * 0.05), 0.70, 0.95);
122
+ // Loss plateau
123
+ const lossStart = 7.0 + (rng() - 0.5) * 0.10 * args.amount; // ~7.0 ±0.05
124
+ const lossPlateau = 6.78 + (rng() - 0.5) * 0.04 * args.amount; // ~6.78 ±0.02
125
+ const lossK = 2.0 + rng() * 1.5; // decay speed
126
+ // Acc growth steepness and midpoint
127
+ const kAcc = 4.5 + rng() * 3.0;
128
+ const x0Acc = 0.35 + rng() * 0.25;
129
+ return { trainAccFinal, valAccFinal, lossStart, lossPlateau, lossK, kAcc, x0Acc };
130
+ });
131
+
132
+ const lines = [];
133
+ lines.push('run,step,metric,value,stderr');
134
+
135
+ // EPOCH: linear 0..epochMax across steps
136
+ for (let r = 0; r < runs.length; r++){
137
+ const run = runs[r];
138
+ for (let i = 0; i < steps.length; i++){
139
+ const t = stepNorm(steps[i]);
140
+ const epoch = args.epochMax * t;
141
+ lines.push(`${run},${steps[i]},epoch,${formatLike(epoch, 2)},`);
142
+ }
143
+ }
144
+
145
+ // TRAIN LOSS & VAL LOSS
146
+ for (let r = 0; r < runs.length; r++){
147
+ const run = runs[r];
148
+ const p = runParams[r];
149
+ let prevTrain = null;
150
+ let prevVal = null;
151
+ for (let i = 0; i < steps.length; i++){
152
+ const t = stepNorm(steps[i]);
153
+ const d = expDecay(t, p.lossK); // 0..1
154
+ let trainLoss = p.lossStart - (p.lossStart - p.lossPlateau) * d;
155
+ let valLoss = trainLoss + 0.02 + (rng() * 0.03);
156
+ // Add mild noise
157
+ trainLoss += randn(rng) * 0.01 * args.amount;
158
+ valLoss += randn(rng) * 0.012 * args.amount;
159
+ // Keep reasonable and mostly monotonic (small upward blips allowed)
160
+ if (prevTrain != null) trainLoss = Math.min(prevTrain + 0.01, trainLoss);
161
+ if (prevVal != null) valLoss = Math.min(prevVal + 0.012, valLoss);
162
+ prevTrain = trainLoss; prevVal = valLoss;
163
+ const stderrTrain = clamp(0.03 - 0.02 * t + Math.abs(randn(rng)) * 0.003, 0.006, 0.04);
164
+ const stderrVal = clamp(0.035 - 0.022 * t + Math.abs(randn(rng)) * 0.003, 0.008, 0.045);
165
+ lines.push(`${run},${steps[i]},train_loss,${formatLike(trainLoss, 3)},${formatLike(stderrTrain, 3)}`);
166
+ lines.push(`${run},${steps[i]},val_loss,${formatLike(valLoss, 3)},${formatLike(stderrVal, 3)}`);
167
+ }
168
+ }
169
+
170
+ // TRAIN ACCURACY & VAL ACCURACY (logistic)
171
+ for (let r = 0; r < runs.length; r++){
172
+ const run = runs[r];
173
+ const p = runParams[r];
174
+ for (let i = 0; i < steps.length; i++){
175
+ const t = stepNorm(steps[i]);
176
+ const accBase = logistic(t, p.kAcc, p.x0Acc);
177
+ let trainAcc = clamp(0.55 + accBase * (p.trainAccFinal - 0.55), 0, 1);
178
+ let valAcc = clamp(0.52 + accBase * (p.valAccFinal - 0.52), 0, 1);
179
+ // Gentle noise
180
+ trainAcc = clamp(trainAcc + randn(rng) * 0.005 * args.amount, 0, 1);
181
+ valAcc = clamp(valAcc + randn(rng) * 0.006 * args.amount, 0, 1);
182
+ const stderrTrain = clamp(0.02 - 0.011 * t + Math.abs(randn(rng)) * 0.002, 0.006, 0.03);
183
+ const stderrVal = clamp(0.022 - 0.012 * t + Math.abs(randn(rng)) * 0.002, 0.007, 0.032);
184
+ lines.push(`${run},${steps[i]},train_accuracy,${formatLike(trainAcc, 4)},${formatLike(stderrTrain, 3)}`);
185
+ lines.push(`${run},${steps[i]},val_accuracy,${formatLike(valAcc, 4)},${formatLike(stderrVal, 3)}`);
186
+ }
187
+ }
188
+
189
+ // Ensure directory exists
190
+ await fs.mkdir(path.dirname(args.out), { recursive: true });
191
+ await fs.writeFile(args.out, lines.join('\n') + '\n', 'utf8');
192
+ const relOut = path.relative(process.cwd(), args.out);
193
+ console.log(`Synthetic CSV generated: ${relOut}`);
194
+ }
195
+
196
+ main().catch(err => { console.error(err?.stack || String(err)); process.exit(1); });
app/scripts/jitter-trackio-data.mjs ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env node
2
+
3
+ // Jitter Trackio CSV data with small, controlled noise.
4
+ // - Preserves comments (# ...) and blank lines
5
+ // - Leaves 'epoch' values unchanged
6
+ // - Adds mild noise to train/val accuracy (clamped to [0,1])
7
+ // - Adds mild noise to train/val loss (kept >= 0)
8
+ // - Keeps steps untouched
9
+ // Usage:
10
+ // node app/scripts/jitter-trackio-data.mjs \
11
+ // --in app/src/content/assets/data/trackio_wandb_demo.csv \
12
+ // --out app/src/content/assets/data/trackio_wandb_demo.jitter.csv \
13
+ // [--seed 42] [--amount 1.0] [--in-place]
14
+
15
+ import fs from 'node:fs/promises';
16
+ import path from 'node:path';
17
+
18
+ function parseArgs(argv){
19
+ const args = { in: '', out: '', seed: undefined, amount: 1, inPlace: false };
20
+ for (let i = 2; i < argv.length; i++){
21
+ const a = argv[i];
22
+ if (a === '--in' && argv[i+1]) { args.in = argv[++i]; continue; }
23
+ if (a === '--out' && argv[i+1]) { args.out = argv[++i]; continue; }
24
+ if (a === '--seed' && argv[i+1]) { args.seed = Number(argv[++i]); continue; }
25
+ if (a === '--amount' && argv[i+1]) { args.amount = Number(argv[++i]) || 3; continue; }
26
+ if (a === '--in-place') { args.inPlace = true; continue; }
27
+ }
28
+ if (!args.in) throw new Error('--in is required');
29
+ if (args.inPlace) args.out = args.in;
30
+ if (!args.out) {
31
+ const { dir, name, ext } = path.parse(args.in);
32
+ args.out = path.join(dir, `${name}.jitter${ext || '.csv'}`);
33
+ }
34
+ return args;
35
+ }
36
+
37
+ function mulberry32(seed){
38
+ let t = seed >>> 0;
39
+ return function(){
40
+ t += 0x6D2B79F5;
41
+ let r = Math.imul(t ^ (t >>> 15), 1 | t);
42
+ r ^= r + Math.imul(r ^ (r >>> 7), 61 | r);
43
+ return ((r ^ (r >>> 14)) >>> 0) / 4294967296;
44
+ };
45
+ }
46
+
47
+ function makeRng(seed){
48
+ if (Number.isFinite(seed)) return mulberry32(seed);
49
+ return Math.random;
50
+ }
51
+
52
+ function randn(rng){
53
+ // Box-Muller transform
54
+ let u = 0, v = 0;
55
+ while (u === 0) u = rng();
56
+ while (v === 0) v = rng();
57
+ return Math.sqrt(-2.0 * Math.log(u)) * Math.cos(2.0 * Math.PI * v);
58
+ }
59
+
60
+ function jitterValue(metric, value, amount, rng){
61
+ const m = metric.toLowerCase();
62
+ if (m === 'epoch') return value; // keep as-is
63
+ if (m.includes('accuracy')){
64
+ const n = Math.max(-0.02 * amount, Math.min(0.02 * amount, randn(rng) * 0.01 * amount));
65
+ return Math.max(0, Math.min(1, value + n));
66
+ }
67
+ if (m.includes('loss')){
68
+ const n = Math.max(-0.03 * amount, Math.min(0.03 * amount, randn(rng) * 0.01 * amount));
69
+ return Math.max(0, value + n);
70
+ }
71
+ // default: tiny noise
72
+ const n = Math.max(-0.01 * amount, Math.min(0.01 * amount, randn(rng) * 0.005 * amount));
73
+ return value + n;
74
+ }
75
+
76
+ function formatNumberLike(original, value){
77
+ const s = String(original);
78
+ const dot = s.indexOf('.')
79
+ const decimals = dot >= 0 ? (s.length - dot - 1) : 0;
80
+ if (!Number.isFinite(value)) return s;
81
+ if (decimals <= 0) return String(Math.round(value));
82
+ return value.toFixed(decimals);
83
+ }
84
+
85
+ async function main(){
86
+ const args = parseArgs(process.argv);
87
+ const rng = makeRng(args.seed);
88
+ const raw = await fs.readFile(args.in, 'utf8');
89
+ const lines = raw.split(/\r?\n/);
90
+ const out = new Array(lines.length);
91
+
92
+ for (let i = 0; i < lines.length; i++){
93
+ const line = lines[i];
94
+ if (!line || line.trim().length === 0) { out[i] = line; continue; }
95
+ if (/^\s*#/.test(line)) { out[i] = line; continue; }
96
+
97
+ // Preserve header line unmodified
98
+ if (i === 0 && /^\s*run\s*,\s*step\s*,\s*metric\s*,\s*value\s*,\s*stderr\s*$/i.test(line)) {
99
+ out[i] = line; continue;
100
+ }
101
+
102
+ const cols = line.split(',');
103
+ if (cols.length < 4) { out[i] = line; continue; }
104
+
105
+ const [run, stepStr, metric, valueStr, stderrStr = ''] = cols;
106
+ const trimmedMetric = (metric || '').trim();
107
+ const valueNum = Number((valueStr || '').trim());
108
+
109
+ if (!Number.isFinite(valueNum)) { out[i] = line; continue; }
110
+
111
+ const jittered = jitterValue(trimmedMetric, valueNum, args.amount, rng);
112
+ const valueOut = formatNumberLike(valueStr, jittered);
113
+
114
+ // Reassemble with original column count and positions
115
+ const result = [run, stepStr, metric, valueOut, stderrStr].join(',');
116
+ out[i] = result;
117
+ }
118
+
119
+ const finalText = out.join('\n');
120
+ await fs.writeFile(args.out, finalText, 'utf8');
121
+ const relIn = path.relative(process.cwd(), args.in);
122
+ const relOut = path.relative(process.cwd(), args.out);
123
+ console.log(`Jittered data written: ${relOut} (from ${relIn})`);
124
+ }
125
+
126
+ main().catch(err => {
127
+ console.error(err?.stack || String(err));
128
+ process.exit(1);
129
+ });
app/scripts/latex-importer/README.md ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # LaTeX Importer
2
+
3
+ Complete LaTeX to MDX (Markdown + JSX) importer optimized for Astro with advanced support for references, interactive equations, and components.
4
+
5
+ ## 🚀 Quick Start
6
+
7
+ ```bash
8
+ # Complete LaTeX → MDX conversion with all features
9
+ node index.mjs
10
+
11
+ # For step-by-step debugging
12
+ node latex-converter.mjs # LaTeX → Markdown
13
+ node mdx-converter.mjs # Markdown → MDX
14
+ ```
15
+
16
+ ## 📁 Structure
17
+
18
+ ```
19
+ latex-importer/
20
+ ├── index.mjs # Complete LaTeX → MDX pipeline
21
+ ├── latex-converter.mjs # LaTeX → Markdown with Pandoc
22
+ ├── mdx-converter.mjs # Markdown → MDX with Astro components
23
+ ├── reference-preprocessor.mjs # LaTeX references cleanup
24
+ ├── post-processor.mjs # Markdown post-processing
25
+ ├── bib-cleaner.mjs # Bibliography cleaner
26
+ ├── filters/
27
+ │ └── equation-ids.lua # Pandoc filter for KaTeX equations
28
+ ├── input/ # LaTeX sources
29
+ │ ├── main.tex
30
+ │ ├── main.bib
31
+ │ └── sections/
32
+ └── output/ # Results
33
+ ├── main.md # Intermediate Markdown
34
+ └── main.mdx # Final MDX for Astro
35
+ ```
36
+
37
+ ## ✨ Key Features
38
+
39
+ ### 🎯 **Smart References**
40
+ - **Invisible anchors**: Automatic conversion of `\label{}` to `<span id="..." style="position: absolute;"></span>`
41
+ - **Clean links**: Identifier cleanup (`:` → `-`, removing prefixes `sec:`, `fig:`, `eq:`)
42
+ - **Cross-references**: Full support for `\ref{}` with functional links
43
+
44
+ ### 🧮 **Interactive Equations**
45
+ - **KaTeX IDs**: Conversion of `\label{eq:...}` to `\htmlId{id}{equation}`
46
+ - **Equation references**: Clickable links to mathematical equations
47
+ - **Advanced KaTeX support**: `trust: true` configuration for `\htmlId{}`
48
+
49
+ ### 🎨 **Automatic Styling**
50
+ - **Highlights**: `\highlight{text}` → `<span class="highlight">text</span>`
51
+ - **Auto cleanup**: Removal of numbering `(1)`, `(2)`, etc.
52
+ - **Astro components**: Images → `Figure` with automatic imports
53
+
54
+ ### 🔧 **Robust Pipeline**
55
+ - **LaTeX preprocessor**: Reference cleanup before Pandoc
56
+ - **Lua filter**: Equation processing in Pandoc AST
57
+ - **Post-processor**: Markdown cleanup and optimization
58
+ - **MDX converter**: Final transformation with Astro components
59
+
60
+ ## 📊 Example Workflow
61
+
62
+ ```bash
63
+ # 1. Prepare LaTeX sources
64
+ cp my-paper/* input/
65
+
66
+ # 2. Complete automatic conversion
67
+ node index.mjs
68
+
69
+ # 3. Generated results
70
+ ls output/
71
+ # → main.md (Intermediate Markdown)
72
+ # → main.mdx (Final MDX for Astro)
73
+ # → assets/image/ (extracted images)
74
+ ```
75
+
76
+ ### 📋 Conversion Result
77
+
78
+ The pipeline generates an MDX file optimized for Astro with:
79
+
80
+ ```mdx
81
+ ---
82
+ title: "Your Article Title"
83
+ description: "Generated from LaTeX"
84
+ ---
85
+
86
+ import Figure from '../components/Figure.astro';
87
+ import figure1 from '../assets/image/figure1.png';
88
+
89
+ ## Section with invisible anchor
90
+ <span id="introduction" style="position: absolute;"></span>
91
+
92
+ Here is some text with <span class="highlight">highlighted words</span>.
93
+
94
+ Reference to an interactive [equation](#equation-name).
95
+
96
+ Equation with KaTeX ID:
97
+ $$\htmlId{equation-name}{E = mc^2}$$
98
+
99
+ <Figure src={figure1} alt="Description" />
100
+ ```
101
+
102
+ ## ⚙️ Required Astro Configuration
103
+
104
+ To use equations with IDs, add to `astro.config.mjs`:
105
+
106
+ ```javascript
107
+ import rehypeKatex from 'rehype-katex';
108
+
109
+ export default defineConfig({
110
+ markdown: {
111
+ rehypePlugins: [
112
+ [rehypeKatex, { trust: true }], // ← Important for \htmlId{}
113
+ ],
114
+ },
115
+ });
116
+ ```
117
+
118
+ ## 🛠️ Prerequisites
119
+
120
+ - **Node.js** with ESM support
121
+ - **Pandoc** (`brew install pandoc`)
122
+ - **Astro** to use the generated MDX
123
+
124
+ ## 🎯 Technical Architecture
125
+
126
+ ### 4-Stage Pipeline
127
+
128
+ 1. **LaTeX Preprocessing** (`reference-preprocessor.mjs`)
129
+ - Cleanup of `\label{}` and `\ref{}`
130
+ - Conversion `\highlight{}` → CSS spans
131
+ - Removal of prefixes and problematic characters
132
+
133
+ 2. **Pandoc + Lua Filter** (`equation-ids.lua`)
134
+ - LaTeX → Markdown conversion with `gfm+tex_math_dollars+raw_html`
135
+ - Equation processing: `\label{eq:name}` → `\htmlId{name}{equation}`
136
+ - Automatic image extraction
137
+
138
+ 3. **Markdown Post-processing** (`post-processor.mjs`)
139
+ - KaTeX, Unicode, grouping commands cleanup
140
+ - Attribute correction with `:`
141
+ - Code snippet injection
142
+
143
+ 4. **MDX Conversion** (`mdx-converter.mjs`)
144
+ - Images transformation → `Figure`
145
+ - HTML span escaping correction
146
+ - Automatic imports generation
147
+ - MDX frontmatter
148
+
149
+ ## 📊 Conversion Statistics
150
+
151
+ For a typical scientific document:
152
+ - **87 labels** detected and processed
153
+ - **48 invisible anchors** created
154
+ - **13 highlight spans** with CSS class
155
+ - **4 equations** with `\htmlId{}` KaTeX
156
+ - **40 images** converted to components
157
+
158
+ ## ✅ Project Status
159
+
160
+ ### 🎉 **Complete Features**
161
+ - ✅ **LaTeX → MDX Pipeline**: Full end-to-end functional conversion
162
+ - ✅ **Cross-document references**: Perfectly functional internal links
163
+ - ✅ **Interactive equations**: KaTeX support with clickable IDs
164
+ - ✅ **Automatic styling**: Highlights and Astro components
165
+ - ✅ **Robustness**: Automatic cleanup of all escaping
166
+ - ✅ **Optimization**: Clean code without unnecessary elements
167
+
168
+ ### 🚀 **Production Ready**
169
+ The toolkit is now **100% operational** for converting complex scientific LaTeX documents to MDX/Astro with all advanced features (references, interactive equations, styling).
app/scripts/latex-importer/bib-cleaner.mjs ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env node
2
+
3
+ import { readFileSync, writeFileSync, existsSync } from 'fs';
4
+ import { join, dirname, basename } from 'path';
5
+
6
+ /**
7
+ * Clean a BibTeX file by removing local file references and paths
8
+ * @param {string} inputBibFile - Path to the input .bib file
9
+ * @param {string} outputBibFile - Path to the output cleaned .bib file
10
+ * @returns {boolean} - Success status
11
+ */
12
+ export function cleanBibliography(inputBibFile, outputBibFile) {
13
+ if (!existsSync(inputBibFile)) {
14
+ console.log(' ⚠️ No bibliography file found:', inputBibFile);
15
+ return false;
16
+ }
17
+
18
+ console.log('📚 Cleaning bibliography...');
19
+ let bibContent = readFileSync(inputBibFile, 'utf8');
20
+
21
+ // Remove file paths and local references
22
+ bibContent = bibContent.replace(/file = \{[^}]+\}/g, '');
23
+
24
+ // Remove empty lines created by file removal
25
+ bibContent = bibContent.replace(/,\s*\n\s*\n/g, '\n\n');
26
+ bibContent = bibContent.replace(/,\s*\}/g, '\n}');
27
+
28
+ // Clean up double commas
29
+ bibContent = bibContent.replace(/,,/g, ',');
30
+
31
+ // Remove trailing commas before closing braces
32
+ bibContent = bibContent.replace(/,(\s*\n\s*)\}/g, '$1}');
33
+
34
+ writeFileSync(outputBibFile, bibContent);
35
+ console.log(` 📄 Clean bibliography saved: ${outputBibFile}`);
36
+
37
+ return true;
38
+ }
39
+
40
+ /**
41
+ * CLI for bibliography cleaning
42
+ */
43
+ function main() {
44
+ const args = process.argv.slice(2);
45
+
46
+ if (args.includes('--help') || args.includes('-h')) {
47
+ console.log(`
48
+ 📚 BibTeX Bibliography Cleaner
49
+
50
+ Usage:
51
+ node bib-cleaner.mjs [input.bib] [output.bib]
52
+ node bib-cleaner.mjs --input=input.bib --output=output.bib
53
+
54
+ Options:
55
+ --input=FILE Input .bib file
56
+ --output=FILE Output cleaned .bib file
57
+ --help, -h Show this help
58
+
59
+ Examples:
60
+ # Clean main.bib to clean.bib
61
+ node bib-cleaner.mjs main.bib clean.bib
62
+
63
+ # Using flags
64
+ node bib-cleaner.mjs --input=references.bib --output=clean-refs.bib
65
+ `);
66
+ process.exit(0);
67
+ }
68
+
69
+ let inputFile, outputFile;
70
+
71
+ // Parse command line arguments
72
+ if (args.length >= 2 && !args[0].startsWith('--')) {
73
+ // Positional arguments
74
+ inputFile = args[0];
75
+ outputFile = args[1];
76
+ } else {
77
+ // Named arguments
78
+ for (const arg of args) {
79
+ if (arg.startsWith('--input=')) {
80
+ inputFile = arg.split('=')[1];
81
+ } else if (arg.startsWith('--output=')) {
82
+ outputFile = arg.split('=')[1];
83
+ }
84
+ }
85
+ }
86
+
87
+ if (!inputFile || !outputFile) {
88
+ console.error('❌ Both input and output files are required');
89
+ console.log('Use --help for usage information');
90
+ process.exit(1);
91
+ }
92
+
93
+ const success = cleanBibliography(inputFile, outputFile);
94
+ if (success) {
95
+ console.log('🎉 Bibliography cleaning completed!');
96
+ } else {
97
+ process.exit(1);
98
+ }
99
+ }
100
+
101
+ // Run CLI if called directly
102
+ if (import.meta.url === `file://${process.argv[1]}`) {
103
+ main();
104
+ }
app/scripts/latex-importer/filters/equation-ids.lua ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ --[[
2
+ Pandoc Lua filter to add IDs to equations using KaTeX \htmlId syntax
3
+
4
+ This filter processes display math equations and inline math that contain
5
+ \label{} commands, and wraps them with \htmlId{clean-id}{content} for KaTeX.
6
+
7
+ Requirements:
8
+ - KaTeX renderer with trust: true option
9
+ - Equations with \label{} commands in LaTeX
10
+ --]]
11
+
12
+ -- Function to clean identifier strings (remove prefixes and colons)
13
+ function clean_identifier(id_str)
14
+ if id_str and type(id_str) == "string" then
15
+ -- Remove common prefixes and replace colons with dashes
16
+ local clean = id_str
17
+ :gsub("^(eq|equation):", "") -- Remove eq: prefix
18
+ :gsub(":", "-") -- Replace colons with dashes
19
+ :gsub("[^a-zA-Z0-9_-]", "-") -- Replace other problematic chars
20
+ :gsub("-+", "-") -- Collapse multiple dashes
21
+ :gsub("^-", "") -- Remove leading dash
22
+ :gsub("-$", "") -- Remove trailing dash
23
+
24
+ -- Ensure we don't have empty identifiers
25
+ if clean == "" then
26
+ clean = id_str:gsub(":", "-")
27
+ end
28
+
29
+ return clean
30
+ end
31
+ return id_str
32
+ end
33
+
34
+ -- Process Math elements (both inline and display)
35
+ function Math(el)
36
+ local math_content = el.text
37
+
38
+ -- Look for \label{...} commands in the math content
39
+ local label_match = math_content:match("\\label%{([^}]+)%}")
40
+
41
+ if label_match then
42
+ -- Clean the identifier
43
+ local clean_id = clean_identifier(label_match)
44
+
45
+ -- Remove the \label{} command from the math content
46
+ local clean_math = math_content:gsub("\\label%{[^}]+%}", "")
47
+
48
+ -- Clean up any extra whitespace or line breaks that might remain
49
+ clean_math = clean_math:gsub("%s*$", ""):gsub("^%s*", "")
50
+
51
+ -- Handle different equation environments appropriately
52
+ -- For align environments, preserve them as they work with KaTeX
53
+ local has_align = clean_math:match("\\begin%{align%}")
54
+
55
+ if has_align then
56
+ -- For align environments, we keep the structure and add ID as an attribute
57
+ -- KaTeX supports align environments natively
58
+ clean_math = clean_math:gsub("\\begin%{align%}", "\\begin{align}")
59
+ clean_math = clean_math:gsub("\\end%{align%}", "\\end{align}")
60
+ else
61
+ -- Remove other equation environments that don't work well with \htmlId
62
+ clean_math = clean_math:gsub("\\begin%{equation%}", ""):gsub("\\end%{equation%}", "")
63
+ clean_math = clean_math:gsub("\\begin%{equation%*%}", ""):gsub("\\end%{equation%*%}", "")
64
+ clean_math = clean_math:gsub("\\begin%{align%*%}", ""):gsub("\\end%{align%*%}", "")
65
+ end
66
+
67
+ -- Clean up any remaining whitespace
68
+ clean_math = clean_math:gsub("%s*$", ""):gsub("^%s*", "")
69
+
70
+ local new_math
71
+ if has_align then
72
+ -- For align environments, KaTeX doesn't support \htmlId with align
73
+ -- Instead, we add a special marker that the post-processor will convert to a span
74
+ -- This span will serve as an anchor for references
75
+ new_math = "%%ALIGN_ANCHOR_ID{" .. clean_id .. "}%%\n" .. clean_math
76
+ else
77
+ -- For other math, wrap with \htmlId{}
78
+ new_math = "\\htmlId{" .. clean_id .. "}{" .. clean_math .. "}"
79
+ end
80
+
81
+ -- Return new Math element with the updated content
82
+ return pandoc.Math(el.mathtype, new_math)
83
+ end
84
+
85
+ -- Return unchanged if no label found
86
+ return el
87
+ end
88
+
89
+ -- Optional: Process RawInline elements that might contain LaTeX math
90
+ function RawInline(el)
91
+ if el.format == "latex" or el.format == "tex" then
92
+ local content = el.text
93
+
94
+ -- Look for equation environments with labels
95
+ local label_match = content:match("\\label%{([^}]+)%}")
96
+
97
+ if label_match then
98
+ local clean_id = clean_identifier(label_match)
99
+
100
+ -- For raw LaTeX, we might need different handling
101
+ -- This is a simplified approach - adjust based on your needs
102
+ local clean_content = content:gsub("\\label%{[^}]+%}", "")
103
+
104
+ if clean_content:match("\\begin%{equation") or clean_content:match("\\begin%{align") then
105
+ -- For equation environments, we might need to wrap differently
106
+ -- This depends on how your KaTeX setup handles equation environments
107
+ return pandoc.RawInline(el.format, clean_content)
108
+ end
109
+ end
110
+ end
111
+
112
+ return el
113
+ end
114
+
115
+ -- Optional: Process RawBlock elements for display equations
116
+ function RawBlock(el)
117
+ if el.format == "latex" or el.format == "tex" then
118
+ local content = el.text
119
+
120
+ -- Look for equation environments with labels
121
+ local label_match = content:match("\\label%{([^}]+)%}")
122
+
123
+ if label_match then
124
+ local clean_id = clean_identifier(label_match)
125
+ local clean_content = content:gsub("\\label%{[^}]+%}", "")
126
+
127
+ -- For block equations, we might want to preserve the structure
128
+ -- but add the htmlId functionality
129
+ return pandoc.RawBlock(el.format, clean_content)
130
+ end
131
+ end
132
+
133
+ return el
134
+ end
app/scripts/latex-importer/index.mjs ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env node
2
+
3
+ import { join, dirname } from 'path';
4
+ import { fileURLToPath } from 'url';
5
+ import { copyFileSync } from 'fs';
6
+ import { convertLatexToMarkdown } from './latex-converter.mjs';
7
+ import { convertToMdx } from './mdx-converter.mjs';
8
+ import { cleanBibliography } from './bib-cleaner.mjs';
9
+
10
+ const __filename = fileURLToPath(import.meta.url);
11
+ const __dirname = dirname(__filename);
12
+
13
+ // Default configuration
14
+ const DEFAULT_INPUT = join(__dirname, 'input', 'main.tex');
15
+ const DEFAULT_OUTPUT = join(__dirname, 'output');
16
+ const ASTRO_CONTENT_PATH = join(__dirname, '..', '..', 'src', 'content', 'article.mdx');
17
+
18
+ function parseArgs() {
19
+ const args = process.argv.slice(2);
20
+ const config = {
21
+ input: DEFAULT_INPUT,
22
+ output: DEFAULT_OUTPUT,
23
+ clean: false,
24
+ bibOnly: false,
25
+ convertOnly: false,
26
+ mdx: false,
27
+ };
28
+
29
+ for (const arg of args) {
30
+ if (arg.startsWith('--input=')) {
31
+ config.input = arg.split('=')[1];
32
+ } else if (arg.startsWith('--output=')) {
33
+ config.output = arg.split('=')[1];
34
+ } else if (arg === '--clean') {
35
+ config.clean = true;
36
+ } else if (arg === '--bib-only') {
37
+ config.bibOnly = true;
38
+ } else if (arg === '--convert-only') {
39
+ config.convertOnly = true;
40
+ }
41
+ }
42
+
43
+ return config;
44
+ }
45
+
46
+ function showHelp() {
47
+ console.log(`
48
+ 🚀 LaTeX to Markdown Toolkit
49
+
50
+ Usage:
51
+ node index.mjs [options]
52
+
53
+ Options:
54
+ --input=PATH Input LaTeX file (default: input/main.tex)
55
+ --output=PATH Output directory (default: output/)
56
+ --clean Clean output directory before processing
57
+ --bib-only Only clean bibliography file
58
+ --convert-only Only convert LaTeX to Markdown (skip bib cleaning)
59
+ --help, -h Show this help
60
+
61
+ Examples:
62
+ # Full conversion with bibliography cleaning
63
+ node index.mjs --clean
64
+
65
+ # Only clean bibliography
66
+ node index.mjs --bib-only --input=paper.tex --output=clean/
67
+
68
+ # Only convert LaTeX (use existing clean bibliography)
69
+ node index.mjs --convert-only
70
+
71
+ # Custom paths
72
+ node index.mjs --input=../paper/main.tex --output=../results/ --clean
73
+ `);
74
+ }
75
+
76
+ function main() {
77
+ const args = process.argv.slice(2);
78
+
79
+ if (args.includes('--help') || args.includes('-h')) {
80
+ showHelp();
81
+ process.exit(0);
82
+ }
83
+
84
+ const config = parseArgs();
85
+
86
+ console.log('🚀 LaTeX to Markdown Toolkit');
87
+ console.log('==============================');
88
+
89
+ try {
90
+ if (config.bibOnly) {
91
+ // Only clean bibliography
92
+ console.log('📚 Bibliography cleaning mode');
93
+ const bibInput = config.input.replace('.tex', '.bib');
94
+ const bibOutput = join(config.output, 'main.bib');
95
+
96
+ cleanBibliography(bibInput, bibOutput);
97
+ console.log('🎉 Bibliography cleaning completed!');
98
+
99
+ } else if (config.convertOnly) {
100
+ // Only convert LaTeX
101
+ console.log('📄 Conversion only mode');
102
+ convertLatexToMarkdown(config.input, config.output);
103
+
104
+ } else {
105
+ // Full workflow
106
+ console.log('🔄 Full conversion workflow');
107
+ convertLatexToMarkdown(config.input, config.output);
108
+
109
+ // Convert to MDX if requested
110
+ const markdownFile = join(config.output, 'main.md');
111
+ const mdxFile = join(config.output, 'main.mdx');
112
+
113
+ console.log('📝 Converting Markdown to MDX...');
114
+ convertToMdx(markdownFile, mdxFile);
115
+
116
+ // Copy MDX to Astro content directory
117
+ console.log('📋 Copying MDX to Astro content directory...');
118
+ try {
119
+ copyFileSync(mdxFile, ASTRO_CONTENT_PATH);
120
+ console.log(` ✅ Copied to ${ASTRO_CONTENT_PATH}`);
121
+ } catch (error) {
122
+ console.warn(` ⚠️ Failed to copy MDX to Astro: ${error.message}`);
123
+ }
124
+ }
125
+
126
+ } catch (error) {
127
+ console.error('❌ Error:', error.message);
128
+ process.exit(1);
129
+ }
130
+ }
131
+
132
+ // Export functions for use as module
133
+ export { convertLatexToMarkdown, cleanBibliography };
134
+
135
+ // Run CLI if called directly
136
+ if (import.meta.url === `file://${process.argv[1]}`) {
137
+ main();
138
+ }
app/scripts/latex-importer/latex-converter.mjs ADDED
@@ -0,0 +1,330 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env node
2
+
3
+ import { execSync } from 'child_process';
4
+ import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
5
+ import { join, dirname, basename } from 'path';
6
+ import { fileURLToPath } from 'url';
7
+ import { cleanBibliography } from './bib-cleaner.mjs';
8
+ import { postProcessMarkdown } from './post-processor.mjs';
9
+ import { preprocessLatexReferences } from './reference-preprocessor.mjs';
10
+
11
+ const __filename = fileURLToPath(import.meta.url);
12
+ const __dirname = dirname(__filename);
13
+
14
+ // Configuration
15
+ const DEFAULT_INPUT = join(__dirname, 'input', 'main.tex');
16
+ const DEFAULT_OUTPUT = join(__dirname, 'output');
17
+
18
+ function parseArgs() {
19
+ const args = process.argv.slice(2);
20
+ const config = {
21
+ input: DEFAULT_INPUT,
22
+ output: DEFAULT_OUTPUT,
23
+ clean: false
24
+ };
25
+
26
+ for (const arg of args) {
27
+ if (arg.startsWith('--input=')) {
28
+ config.input = arg.split('=')[1];
29
+ } else if (arg.startsWith('--output=')) {
30
+ config.output = arg.split('=')[1];
31
+ } else if (arg === '--clean') {
32
+ config.clean = true;
33
+ }
34
+ }
35
+
36
+ return config;
37
+ }
38
+
39
+ function ensureDirectory(dir) {
40
+ if (!existsSync(dir)) {
41
+ mkdirSync(dir, { recursive: true });
42
+ }
43
+ }
44
+
45
+ function cleanDirectory(dir) {
46
+ if (existsSync(dir)) {
47
+ execSync(`rm -rf "${dir}"/*`, { stdio: 'inherit' });
48
+ }
49
+ }
50
+
51
+ function preprocessLatexFile(inputFile, outputDir) {
52
+ const inputDir = dirname(inputFile);
53
+ const tempFile = join(outputDir, 'temp_main.tex');
54
+
55
+ console.log('🔄 Preprocessing LaTeX file to resolve \\input commands...');
56
+
57
+ let content = readFileSync(inputFile, 'utf8');
58
+
59
+ // Remove problematic commands that break pandoc
60
+ console.log('🧹 Cleaning problematic LaTeX constructs...');
61
+
62
+ // Fix citation issues - but not in citation keys
63
+ content = content.replace(/\$p_0\$(?![A-Za-z])/g, 'p0');
64
+
65
+ // Convert complex math environments to simple delimiters
66
+ content = content.replace(/\$\$\\begin\{equation\*\}/g, '$$');
67
+ content = content.replace(/\\end\{equation\*\}\$\$/g, '$$');
68
+ content = content.replace(/\\begin\{equation\*\}/g, '$$');
69
+ content = content.replace(/\\end\{equation\*\}/g, '$$');
70
+ // Keep align environments intact for KaTeX support
71
+ // Protect align environments by temporarily replacing them before cleaning & operators
72
+ const alignBlocks = [];
73
+ content = content.replace(/\\begin\{align\}([\s\S]*?)\\end\{align\}/g, (match, alignContent) => {
74
+ alignBlocks.push(match);
75
+ return `__ALIGN_BLOCK_${alignBlocks.length - 1}__`;
76
+ });
77
+
78
+ // Now remove & operators from non-align content (outside align environments)
79
+ content = content.replace(/&=/g, '=');
80
+ content = content.replace(/&/g, '');
81
+
82
+ // Restore align blocks with their & operators intact
83
+ alignBlocks.forEach((block, index) => {
84
+ content = content.replace(`__ALIGN_BLOCK_${index}__`, block);
85
+ });
86
+
87
+ // Convert LaTeX citations to Pandoc format
88
+ content = content.replace(/\\cite[tp]?\{([^}]+)\}/g, (match, citations) => {
89
+ // Handle multiple citations separated by commas - all become simple @citations
90
+ return citations.split(',').map(cite => `@${cite.trim()}`).join(', ');
91
+ });
92
+
93
+ // Handle complex \textsc with nested math - extract and simplify (but not in command definitions)
94
+ content = content.replace(/\\textsc\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}/g, (match, content_inside, offset) => {
95
+ // Skip if this is inside a \newcommand or similar definition
96
+ const before = content.substring(Math.max(0, offset - 50), offset);
97
+ if (before.includes('\\newcommand') || before.includes('\\renewcommand') || before.includes('\\def')) {
98
+ return match; // Keep original
99
+ }
100
+
101
+ // Remove math delimiters inside textsc for simplification
102
+ const simplified = content_inside.replace(/\\\([^)]+\\\)/g, 'MATHEXPR');
103
+ return `\\text{${simplified}}`;
104
+ });
105
+
106
+ // Remove complex custom commands that pandoc can't handle
107
+ content = content.replace(/\\input\{snippets\/[^}]+\}/g, '% Code snippet removed');
108
+
109
+ // Find all \input{} commands (but skip commented ones)
110
+ const inputRegex = /^([^%]*?)\\input\{([^}]+)\}/gm;
111
+ let match;
112
+
113
+ while ((match = inputRegex.exec(content)) !== null) {
114
+ const beforeInput = match[1];
115
+ const inputPath = match[2];
116
+
117
+ // Skip if the \input is commented (% appears before \input on the line)
118
+ if (beforeInput.includes('%')) {
119
+ continue;
120
+ }
121
+ let fullPath;
122
+
123
+ // Skip only problematic files, let Pandoc handle macros
124
+ if (inputPath.includes('snippets/')) {
125
+ console.log(` Skipping: ${inputPath}`);
126
+ content = content.replace(`\\input{${inputPath}}`, `% Skipped: ${inputPath}`);
127
+ continue;
128
+ }
129
+
130
+ // Handle paths with or without .tex extension
131
+ if (inputPath.endsWith('.tex')) {
132
+ fullPath = join(inputDir, inputPath);
133
+ } else {
134
+ fullPath = join(inputDir, inputPath + '.tex');
135
+ }
136
+
137
+ if (existsSync(fullPath)) {
138
+ console.log(` Including: ${inputPath}`);
139
+ let includedContent = readFileSync(fullPath, 'utf8');
140
+
141
+ // Clean included content too
142
+ includedContent = includedContent.replace(/\$p_0\$/g, 'p0');
143
+ includedContent = includedContent.replace(/\\input\{snippets\/[^}]+\}/g, '% Code snippet removed');
144
+
145
+ // Handle complex \textsc in included content
146
+ includedContent = includedContent.replace(/\\textsc\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}/g, (match, content_inside, offset) => {
147
+ // Skip if this is inside a \newcommand or similar definition
148
+ const before = includedContent.substring(Math.max(0, offset - 50), offset);
149
+ if (before.includes('\\newcommand') || before.includes('\\renewcommand') || before.includes('\\def')) {
150
+ return match; // Keep original
151
+ }
152
+
153
+ const simplified = content_inside.replace(/\\\([^)]+\\\)/g, 'MATHEXPR');
154
+ return `\\text{${simplified}}`;
155
+ });
156
+
157
+ // Apply same align-preserving logic to included content
158
+ const alignBlocksIncluded = [];
159
+ includedContent = includedContent.replace(/\\begin\{align\}([\s\S]*?)\\end\{align\}/g, (match, alignContent) => {
160
+ alignBlocksIncluded.push(match);
161
+ return `__ALIGN_BLOCK_${alignBlocksIncluded.length - 1}__`;
162
+ });
163
+
164
+ // Remove alignment operators from non-align content in included files
165
+ includedContent = includedContent.replace(/&=/g, '=');
166
+ includedContent = includedContent.replace(/&/g, '');
167
+
168
+ // Restore align blocks with their & operators intact
169
+ alignBlocksIncluded.forEach((block, index) => {
170
+ includedContent = includedContent.replace(`__ALIGN_BLOCK_${index}__`, block);
171
+ });
172
+
173
+ // Convert math environments in included content
174
+ includedContent = includedContent.replace(/\$\$\\begin\{equation\*\}/g, '$$');
175
+ includedContent = includedContent.replace(/\\end\{equation\*\}\$\$/g, '$$');
176
+ includedContent = includedContent.replace(/\\begin\{equation\*\}/g, '$$');
177
+ includedContent = includedContent.replace(/\\end\{equation\*\}/g, '$$');
178
+
179
+ // Convert citations in included content
180
+ includedContent = includedContent.replace(/\\cite[tp]?\{([^}]+)\}/g, (match, citations) => {
181
+ return citations.split(',').map(cite => `@${cite.trim()}`).join(', ');
182
+ });
183
+
184
+ content = content.replace(`\\input{${inputPath}}`, includedContent);
185
+ } else {
186
+ console.log(` ⚠️ File not found: ${fullPath} (skipping)`);
187
+ content = content.replace(`\\input{${inputPath}}`, `% File not found: ${inputPath}`);
188
+ }
189
+ }
190
+
191
+ // Apply reference preprocessing AFTER input inclusion to ensure all references are captured
192
+ console.log('🔧 Preprocessing LaTeX references for MDX compatibility...');
193
+ const referenceResult = preprocessLatexReferences(content);
194
+ content = referenceResult.content;
195
+
196
+ // Write the preprocessed file
197
+ writeFileSync(tempFile, content);
198
+ return tempFile;
199
+ }
200
+
201
+ function processBibliography(inputFile, outputDir) {
202
+ const bibFile = join(dirname(inputFile), 'main.bib');
203
+ const outputBibFile = join(outputDir, 'main.bib');
204
+
205
+ if (!existsSync(bibFile)) {
206
+ console.log(' ⚠️ No bibliography file found');
207
+ return null;
208
+ }
209
+
210
+ const success = cleanBibliography(bibFile, outputBibFile);
211
+ return success ? outputBibFile : null;
212
+ }
213
+
214
+ export function convertLatexToMarkdown(inputFile, outputDir) {
215
+ console.log('🚀 Simple LaTeX to Markdown Converter');
216
+ console.log(`📁 Input: ${inputFile}`);
217
+ console.log(`📁 Output: ${outputDir}`);
218
+
219
+ // Check if input file exists
220
+ if (!existsSync(inputFile)) {
221
+ console.error(`❌ Input file not found: ${inputFile}`);
222
+ process.exit(1);
223
+ }
224
+
225
+ // Ensure output directory exists
226
+ ensureDirectory(outputDir);
227
+
228
+ try {
229
+ // Check if pandoc is available
230
+ execSync('pandoc --version', { stdio: 'pipe' });
231
+ } catch (error) {
232
+ console.error('❌ Pandoc not found. Please install it: brew install pandoc');
233
+ process.exit(1);
234
+ }
235
+
236
+ // Clean and copy bibliography
237
+ const cleanBibFile = processBibliography(inputFile, outputDir);
238
+
239
+ // Preprocess the LaTeX file to resolve \input commands
240
+ const preprocessedFile = preprocessLatexFile(inputFile, outputDir);
241
+
242
+ const inputFileName = basename(inputFile, '.tex');
243
+ const outputFile = join(outputDir, `${inputFileName}.md`);
244
+
245
+ try {
246
+ console.log('📄 Converting with Pandoc...');
247
+
248
+ // Enhanced pandoc conversion - use tex_math_dollars for KaTeX compatibility
249
+ const bibOption = cleanBibFile ? `--bibliography="${cleanBibFile}"` : '';
250
+
251
+ // Use gfm+tex_math_dollars for simple $ delimiters compatible with KaTeX
252
+ const mediaDir = join(outputDir, 'assets', 'image');
253
+ ensureDirectory(mediaDir);
254
+ const inputDir = dirname(inputFile);
255
+ const equationFilterPath = join(__dirname, 'filters', 'equation-ids.lua');
256
+ const pandocCommand = `pandoc "${preprocessedFile}" -f latex+latex_macros -t gfm+tex_math_dollars+raw_html --shift-heading-level-by=1 --wrap=none ${bibOption} --extract-media="${mediaDir}" --resource-path="${inputDir}" --lua-filter="${equationFilterPath}" -o "${outputFile}"`;
257
+
258
+ console.log(` Running: ${pandocCommand}`);
259
+ execSync(pandocCommand, { stdio: 'pipe' });
260
+
261
+ // Clean up temp file
262
+ execSync(`rm "${preprocessedFile}"`, { stdio: 'pipe' });
263
+
264
+ // Post-processing to fix KaTeX incompatible constructions
265
+ let markdownContent = readFileSync(outputFile, 'utf8');
266
+
267
+ // Use modular post-processor with code injection
268
+ markdownContent = postProcessMarkdown(markdownContent, inputDir);
269
+
270
+ writeFileSync(outputFile, markdownContent);
271
+
272
+ console.log(`✅ Conversion completed: ${outputFile}`);
273
+
274
+ // Show file size
275
+ const stats = execSync(`wc -l "${outputFile}"`, { encoding: 'utf8' });
276
+ const lines = stats.trim().split(' ')[0];
277
+ console.log(`📊 Result: ${lines} lines written`);
278
+
279
+ } catch (error) {
280
+ console.error('❌ Pandoc conversion failed:');
281
+ console.error(error.message);
282
+ // Clean up temp file on error
283
+ try {
284
+ execSync(`rm "${preprocessedFile}"`, { stdio: 'pipe' });
285
+ } catch { }
286
+ process.exit(1);
287
+ }
288
+ }
289
+
290
+ function main() {
291
+ const config = parseArgs();
292
+
293
+ if (config.clean) {
294
+ console.log('🧹 Cleaning output directory...');
295
+ cleanDirectory(config.output);
296
+ }
297
+
298
+ convertLatexToMarkdown(config.input, config.output);
299
+
300
+ console.log('🎉 Simple conversion completed!');
301
+ }
302
+
303
+ // Show help if requested
304
+ if (process.argv.includes('--help') || process.argv.includes('-h')) {
305
+ console.log(`
306
+ 🚀 Simple LaTeX to Markdown Converter
307
+
308
+ Usage:
309
+ node scripts/simple-latex-to-markdown.mjs [options]
310
+
311
+ Options:
312
+ --input=PATH Input LaTeX file (default: latex-converter/input-example/main.tex)
313
+ --output=PATH Output directory (default: output/)
314
+ --clean Clean output directory before conversion
315
+ --help, -h Show this help
316
+
317
+ Examples:
318
+ # Basic conversion
319
+ node scripts/simple-latex-to-markdown.mjs
320
+
321
+ # Custom paths
322
+ node scripts/simple-latex-to-markdown.mjs --input=my-paper.tex --output=converted/
323
+
324
+ # Clean output first
325
+ node scripts/simple-latex-to-markdown.mjs --clean
326
+ `);
327
+ process.exit(0);
328
+ }
329
+
330
+ main();
app/scripts/latex-importer/mdx-converter.mjs ADDED
@@ -0,0 +1,896 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env node
2
+
3
+ import { readFileSync, writeFileSync, existsSync } from 'fs';
4
+ import { join, dirname, basename, extname } from 'path';
5
+ import { fileURLToPath } from 'url';
6
+ import { extractAndGenerateFrontmatter } from './metadata-extractor.mjs';
7
+
8
+ const __filename = fileURLToPath(import.meta.url);
9
+ const __dirname = dirname(__filename);
10
+
11
+ // Configuration
12
+ const DEFAULT_INPUT = join(__dirname, 'output', 'main.md');
13
+ const DEFAULT_OUTPUT = join(__dirname, 'output', 'main.mdx');
14
+
15
+ function parseArgs() {
16
+ const args = process.argv.slice(2);
17
+ const config = {
18
+ input: DEFAULT_INPUT,
19
+ output: DEFAULT_OUTPUT,
20
+ };
21
+
22
+ for (const arg of args) {
23
+ if (arg.startsWith('--input=')) {
24
+ config.input = arg.substring('--input='.length);
25
+ } else if (arg.startsWith('--output=')) {
26
+ config.output = arg.substring('--output='.length);
27
+ } else if (arg === '--help' || arg === '-h') {
28
+ console.log(`
29
+ 📝 Markdown to MDX Converter
30
+
31
+ Usage:
32
+ node mdx-converter.mjs [options]
33
+
34
+ Options:
35
+ --input=PATH Input Markdown file (default: ${DEFAULT_INPUT})
36
+ --output=PATH Output MDX file (default: ${DEFAULT_OUTPUT})
37
+ --help, -h Show this help
38
+
39
+ Examples:
40
+ # Basic conversion
41
+ node mdx-converter.mjs
42
+
43
+ # Custom paths
44
+ node mdx-converter.mjs --input=article.md --output=article.mdx
45
+ `);
46
+ process.exit(0);
47
+ } else if (!config.input) {
48
+ config.input = arg;
49
+ } else if (!config.output) {
50
+ config.output = arg;
51
+ }
52
+ }
53
+ return config;
54
+ }
55
+
56
+ /**
57
+ * Modular MDX post-processing functions for Astro compatibility
58
+ * Each function handles a specific type of transformation
59
+ */
60
+
61
+ /**
62
+ * Track which Astro components are used during transformations
63
+ */
64
+ const usedComponents = new Set();
65
+
66
+ /**
67
+ * Track individual image imports needed
68
+ */
69
+ const imageImports = new Map(); // src -> varName
70
+
71
+ /**
72
+ * Add required component imports to the frontmatter
73
+ * @param {string} content - MDX content
74
+ * @returns {string} - Content with component imports
75
+ */
76
+ /**
77
+ * Generate a variable name from image path
78
+ * @param {string} src - Image source path
79
+ * @returns {string} - Valid variable name
80
+ */
81
+ function generateImageVarName(src) {
82
+ // Extract filename without extension and make it a valid JS variable
83
+ const filename = src.split('/').pop().replace(/\.[^.]+$/, '');
84
+ return filename.replace(/[^a-zA-Z0-9]/g, '_').replace(/^[0-9]/, 'img_$&');
85
+ }
86
+
87
+ function addComponentImports(content) {
88
+ console.log(' 📦 Adding component and image imports...');
89
+
90
+ let imports = [];
91
+
92
+ // Add component imports
93
+ if (usedComponents.size > 0) {
94
+ const componentImports = Array.from(usedComponents)
95
+ .map(component => `import ${component} from '../components/${component}.astro';`);
96
+ imports.push(...componentImports);
97
+ console.log(` ✅ Importing components: ${Array.from(usedComponents).join(', ')}`);
98
+ }
99
+
100
+ // Add image imports
101
+ if (imageImports.size > 0) {
102
+ const imageImportStatements = Array.from(imageImports.entries())
103
+ .map(([src, varName]) => `import ${varName} from '${src}';`);
104
+ imports.push(...imageImportStatements);
105
+ console.log(` ✅ Importing ${imageImports.size} image(s)`);
106
+ }
107
+
108
+ if (imports.length === 0) {
109
+ console.log(' ℹ️ No imports needed');
110
+ return content;
111
+ }
112
+
113
+ const importBlock = imports.join('\n');
114
+
115
+ // Insert imports after frontmatter
116
+ const frontmatterEnd = content.indexOf('---', 3) + 3;
117
+ if (frontmatterEnd > 2) {
118
+ return content.slice(0, frontmatterEnd) + '\n\n' + importBlock + '\n' + content.slice(frontmatterEnd);
119
+ } else {
120
+ // No frontmatter, add at beginning
121
+ return importBlock + '\n\n' + content;
122
+ }
123
+ }
124
+
125
+
126
+ /**
127
+ * Convert grouped figures (subfigures) to MultiFigure components
128
+ * @param {string} content - MDX content
129
+ * @returns {string} - Content with MultiFigure components for grouped figures
130
+ */
131
+ function convertSubfiguresToMultiFigure(content) {
132
+ console.log(' 🖼️✨ Converting subfigures to MultiFigure components...');
133
+
134
+ let convertedCount = 0;
135
+
136
+ // Pattern to match: <figure> containing multiple <figure> elements with a global caption
137
+ // This matches the LaTeX subfigure pattern that gets converted by Pandoc
138
+ const subfigureGroupPattern = /<figure>\s*((?:<figure>[\s\S]*?<\/figure>\s*){2,})<figcaption>([\s\S]*?)<\/figcaption>\s*<\/figure>/g;
139
+
140
+ const convertedContent = content.replace(subfigureGroupPattern, (match, figuresMatch, globalCaption) => {
141
+ convertedCount++;
142
+
143
+ // Extract individual figures within the group
144
+ // This pattern is more flexible to handle variations in HTML structure
145
+ const individualFigurePattern = /<figure>\s*<img src="([^"]*)"[^>]*\/>\s*<p>&lt;span id="([^"]*)"[^&]*&gt;&lt;\/span&gt;<\/p>\s*<figcaption>([\s\S]*?)<\/figcaption>\s*<\/figure>/g;
146
+
147
+ const images = [];
148
+ let figureMatch;
149
+
150
+ while ((figureMatch = individualFigurePattern.exec(figuresMatch)) !== null) {
151
+ const [, src, id, caption] = figureMatch;
152
+
153
+ // Clean the source path (similar to existing transformImages function)
154
+ const cleanSrc = src.replace(/.*\/output\/assets\//, './assets/')
155
+ .replace(/\/Users\/[^\/]+\/[^\/]+\/[^\/]+\/[^\/]+\/[^\/]+\/app\/scripts\/latex-to-markdown\/output\/assets\//, './assets/');
156
+
157
+ // Clean caption text (remove HTML, normalize whitespace)
158
+ const cleanCaption = caption
159
+ .replace(/<[^>]*>/g, '')
160
+ .replace(/\n/g, ' ')
161
+ .replace(/\s+/g, ' ')
162
+ .replace(/'/g, "\\'")
163
+ .trim();
164
+
165
+ // Generate alt text from caption
166
+ const altText = cleanCaption.length > 100
167
+ ? cleanCaption.substring(0, 100) + '...'
168
+ : cleanCaption;
169
+
170
+ // Generate variable name for import
171
+ const varName = generateImageVarName(cleanSrc);
172
+ imageImports.set(cleanSrc, varName);
173
+
174
+ images.push({
175
+ src: varName,
176
+ alt: altText,
177
+ caption: cleanCaption,
178
+ id: id
179
+ });
180
+ }
181
+
182
+ // Clean global caption
183
+ const cleanGlobalCaption = globalCaption
184
+ .replace(/<[^>]*>/g, '')
185
+ .replace(/\n/g, ' ')
186
+ .replace(/\s+/g, ' ')
187
+ .replace(/'/g, "\\'")
188
+ .trim();
189
+
190
+ // Mark MultiFigure component as used
191
+ usedComponents.add('MultiFigure');
192
+
193
+ // Determine layout based on number of images
194
+ let layout = 'auto';
195
+ if (images.length === 2) layout = '2-column';
196
+ else if (images.length === 3) layout = '3-column';
197
+ else if (images.length === 4) layout = '4-column';
198
+
199
+ // Generate MultiFigure component
200
+ const imagesJson = images.map(img =>
201
+ ` {\n src: ${img.src},\n alt: "${img.alt}",\n caption: "${img.caption}",\n id: "${img.id}"\n }`
202
+ ).join(',\n');
203
+
204
+ return `<MultiFigure
205
+ images={[
206
+ ${imagesJson}
207
+ ]}
208
+ layout="${layout}"
209
+ zoomable
210
+ downloadable
211
+ caption="${cleanGlobalCaption}"
212
+ />`;
213
+ });
214
+
215
+ if (convertedCount > 0) {
216
+ console.log(` ✅ Converted ${convertedCount} subfigure group(s) to MultiFigure component(s)`);
217
+ } else {
218
+ console.log(' ℹ️ No subfigure groups found');
219
+ }
220
+
221
+ return convertedContent;
222
+ }
223
+
224
+ /**
225
+ * Transform images to Figure components
226
+ * @param {string} content - MDX content
227
+ * @returns {string} - Content with Figure components
228
+ */
229
+ /**
230
+ * Create Figure component with import
231
+ * @param {string} src - Clean image source
232
+ * @param {string} alt - Alt text
233
+ * @param {string} id - Element ID
234
+ * @param {string} caption - Figure caption
235
+ * @param {string} width - Optional width
236
+ * @returns {string} - Figure component markup
237
+ */
238
+ function createFigureComponent(src, alt = '', id = '', caption = '', width = '') {
239
+ const varName = generateImageVarName(src);
240
+ imageImports.set(src, varName);
241
+ usedComponents.add('Figure');
242
+
243
+ const props = [];
244
+ props.push(`src={${varName}}`);
245
+ props.push('zoomable');
246
+ props.push('downloadable');
247
+ if (id) props.push(`id="${id}"`);
248
+ props.push('layout="fixed"');
249
+ if (alt) props.push(`alt="${alt}"`);
250
+ if (caption) props.push(`caption={'${caption}'}`);
251
+
252
+ return `<Figure\n ${props.join('\n ')}\n/>`;
253
+ }
254
+
255
+ function transformImages(content) {
256
+ console.log(' 🖼️ Transforming images to Figure components with imports...');
257
+
258
+ let hasImages = false;
259
+
260
+ // Helper function to clean source paths
261
+ const cleanSrcPath = (src) => {
262
+ return src.replace(/.*\/output\/assets\//, './assets/')
263
+ .replace(/\/Users\/[^\/]+\/[^\/]+\/[^\/]+\/[^\/]+\/[^\/]+\/app\/scripts\/latex-to-markdown\/output\/assets\//, './assets/');
264
+ };
265
+
266
+ // Helper to clean caption text
267
+ const cleanCaption = (caption) => {
268
+ return caption
269
+ .replace(/<[^>]*>/g, '') // Remove HTML tags
270
+ .replace(/\n/g, ' ') // Replace newlines with spaces
271
+ .replace(/\r/g, ' ') // Replace carriage returns with spaces
272
+ .replace(/\s+/g, ' ') // Replace multiple spaces with single space
273
+ .replace(/'/g, "\\'") // Escape quotes
274
+ .trim(); // Trim whitespace
275
+ };
276
+
277
+ // Helper to clean alt text
278
+ const cleanAltText = (alt, maxLength = 100) => {
279
+ const cleaned = alt
280
+ .replace(/<[^>]*>/g, '') // Remove HTML tags
281
+ .replace(/\n/g, ' ') // Replace newlines with spaces
282
+ .replace(/\r/g, ' ') // Replace carriage returns with spaces
283
+ .replace(/\s+/g, ' ') // Replace multiple spaces with single space
284
+ .trim(); // Trim whitespace
285
+
286
+ return cleaned.length > maxLength
287
+ ? cleaned.substring(0, maxLength) + '...'
288
+ : cleaned;
289
+ };
290
+
291
+ // 1. Transform complex HTML figures with style attributes
292
+ content = content.replace(
293
+ /<figure id="([^"]*)">\s*<img src="([^"]*)"(?:\s+style="([^"]*)")?\s*\/>\s*<figcaption>\s*(.*?)\s*<\/figcaption>\s*<\/figure>/gs,
294
+ (match, id, src, style, caption) => {
295
+ const cleanSrc = cleanSrcPath(src);
296
+ const cleanCap = cleanCaption(caption);
297
+ const altText = cleanAltText(cleanCap);
298
+ hasImages = true;
299
+
300
+ return createFigureComponent(cleanSrc, altText, id, cleanCap);
301
+ }
302
+ );
303
+
304
+ // 2. Transform standalone img tags with style
305
+ content = content.replace(
306
+ /<img src="([^"]*)"(?:\s+style="([^"]*)")?\s*(?:alt="([^"]*)")?\s*\/>/g,
307
+ (match, src, style, alt) => {
308
+ const cleanSrc = cleanSrcPath(src);
309
+ const cleanAlt = cleanAltText(alt || 'Figure');
310
+ hasImages = true;
311
+
312
+ return createFigureComponent(cleanSrc, cleanAlt);
313
+ }
314
+ );
315
+
316
+ // 3. Transform images within wrapfigure divs
317
+ content = content.replace(
318
+ /<div class="wrapfigure">\s*r[\d.]+\s*<img src="([^"]*)"[^>]*\/>\s*<\/div>/gs,
319
+ (match, src) => {
320
+ const cleanSrc = cleanSrcPath(src);
321
+ hasImages = true;
322
+
323
+ return createFigureComponent(cleanSrc, 'Figure');
324
+ }
325
+ );
326
+
327
+ // 4. Transform simple HTML figure/img without style
328
+ content = content.replace(
329
+ /<figure id="([^"]*)">\s*<img src="([^"]*)" \/>\s*<figcaption>\s*(.*?)\s*<\/figcaption>\s*<\/figure>/gs,
330
+ (match, id, src, caption) => {
331
+ const cleanSrc = cleanSrcPath(src);
332
+ const cleanCap = cleanCaption(caption);
333
+ const altText = cleanAltText(cleanCap);
334
+ hasImages = true;
335
+
336
+ return createFigureComponent(cleanSrc, altText, id, cleanCap);
337
+ }
338
+ );
339
+
340
+ // 5. Clean up figures with minipage divs
341
+ content = content.replace(
342
+ /<figure id="([^"]*)">\s*<div class="minipage">\s*<img src="([^"]*)"[^>]*\/>\s*<\/div>\s*<figcaption[^>]*>(.*?)<\/figcaption>\s*<\/figure>/gs,
343
+ (match, id, src, caption) => {
344
+ const cleanSrc = cleanSrcPath(src);
345
+ const cleanCap = cleanCaption(caption);
346
+ const altText = cleanAltText(cleanCap);
347
+ hasImages = true;
348
+
349
+ return createFigureComponent(cleanSrc, altText, id, cleanCap);
350
+ }
351
+ );
352
+
353
+ // 6. Transform Pandoc-style images: ![alt](src){#id attr="value"}
354
+ content = content.replace(
355
+ /!\[([^\]]*)\]\(([^)]+)\)(?:\{([^}]+)\})?/g,
356
+ (match, alt, src, attributes) => {
357
+ const cleanSrc = cleanSrcPath(src);
358
+ const cleanAlt = cleanAltText(alt || 'Figure');
359
+ hasImages = true;
360
+
361
+ let id = '';
362
+ if (attributes) {
363
+ const idMatch = attributes.match(/#([\w-]+)/);
364
+ if (idMatch) id = idMatch[1];
365
+ }
366
+
367
+ return createFigureComponent(cleanSrc, cleanAlt, id);
368
+ }
369
+ );
370
+
371
+ if (hasImages) {
372
+ console.log(' ✅ Figure components with imports will be created');
373
+ }
374
+
375
+ return content;
376
+ }
377
+
378
+ /**
379
+ * Transform HTML spans with style attributes to appropriate components
380
+ * @param {string} content - MDX content
381
+ * @returns {string} - Content with transformed spans
382
+ */
383
+ function transformStyledSpans(content) {
384
+ console.log(' 🎨 Transforming styled spans...');
385
+
386
+ // Transform HTML spans with style attributes
387
+ content = content.replace(
388
+ /<span style="color: ([^"]+)">(.*?)<\/span>/g,
389
+ (match, color, text) => {
390
+ // Map colors to semantic classes or components
391
+ const colorMap = {
392
+ 'hf2': 'text-hf-secondary',
393
+ 'hf1': 'text-hf-primary'
394
+ };
395
+
396
+ const className = colorMap[color] || `text-${color}`;
397
+ return `<span class="${className}">${text}</span>`;
398
+ }
399
+ );
400
+
401
+ // Transform markdown spans with style attributes: [text]{style="color: color"}
402
+ content = content.replace(
403
+ /\[([^\]]+)\]\{style="color: ([^"]+)"\}/g,
404
+ (match, text, color) => {
405
+ // Map colors to semantic classes or components
406
+ const colorMap = {
407
+ 'hf2': 'text-hf-secondary',
408
+ 'hf1': 'text-hf-primary'
409
+ };
410
+
411
+ const className = colorMap[color] || `text-${color}`;
412
+ return `<span class="${className}">${text}</span>`;
413
+ }
414
+ );
415
+
416
+ return content;
417
+ }
418
+
419
+ /**
420
+ * Transform reference links to proper Astro internal links
421
+ * @param {string} content - MDX content
422
+ * @returns {string} - Content with transformed links
423
+ */
424
+ function fixHtmlEscaping(content) {
425
+ console.log(' 🔧 Fixing HTML escaping in spans...');
426
+
427
+ let fixedCount = 0;
428
+
429
+ // Pattern 1: \<span id="..." style="..."\>\</span\>
430
+ content = content.replace(/\\<span id="([^"]*)" style="([^"]*)"\\>\\<\/span\\>/g, (match, id, style) => {
431
+ fixedCount++;
432
+ // Fix common style issues like "position- absolute;" -> "position: absolute;"
433
+ const cleanStyle = style.replace('position- absolute;', 'position: absolute;');
434
+ return `<span id="${id}" style="${cleanStyle}"></span>`;
435
+ });
436
+
437
+ // Pattern 2: \<span class="..."\>...\</span\>
438
+ content = content.replace(/\\<span class="([^"]*)"\\>([^\\]+)\\<\/span\\>/g, (match, className, text) => {
439
+ fixedCount++;
440
+ // Remove numbering like (1), (2), (3) from highlight spans
441
+ let cleanText = text;
442
+ if (className === 'highlight') {
443
+ cleanText = text.replace(/^\(\d+\)\s*/, '');
444
+ }
445
+ return `<span class="${className}">${cleanText}</span>`;
446
+ });
447
+
448
+ // Pattern 3: HTML-encoded spans in paragraph tags
449
+ // <p>&lt;span id="..." style="..."&gt;&lt;/span&gt;</p>
450
+ content = content.replace(/<p>&lt;span id="([^"]*)" style="([^"]*)"&gt;&lt;\/span&gt;<\/p>/g, (match, id, style) => {
451
+ fixedCount++;
452
+ // Fix common style issues like "position- absolute;" -> "position: absolute;"
453
+ const cleanStyle = style.replace('position- absolute;', 'position: absolute;');
454
+ return `<span id="${id}" style="${cleanStyle}"></span>`;
455
+ });
456
+
457
+ // Pattern 4: HTML-encoded spans with class in paragraph tags
458
+ // <p>&lt;span class="..."&gt;...&lt;/span&gt;</p>
459
+ content = content.replace(/<p>&lt;span class="([^"]*)"&gt;([^&]*)&lt;\/span&gt;<\/p>/g, (match, className, text) => {
460
+ fixedCount++;
461
+ // Remove numbering like (1), (2), (3) from highlight spans
462
+ let cleanText = text;
463
+ if (className === 'highlight') {
464
+ cleanText = text.replace(/^\(\d+\)\s*/, '');
465
+ }
466
+ return `<span class="${className}">${cleanText}</span>`;
467
+ });
468
+
469
+ if (fixedCount > 0) {
470
+ console.log(` ✅ Fixed ${fixedCount} escaped span(s)`);
471
+ }
472
+
473
+ return content;
474
+ }
475
+
476
+ function cleanHighlightNumbering(content) {
477
+ console.log(' 🔢 Removing numbering from highlight spans...');
478
+
479
+ let cleanedCount = 0;
480
+ // Clean numbering from non-escaped highlight spans too
481
+ content = content.replace(/<span class="highlight">(\(\d+\)\s*)([^<]+)<\/span>/g, (match, numbering, text) => {
482
+ cleanedCount++;
483
+ return `<span class="highlight">${text}</span>`;
484
+ });
485
+
486
+ if (cleanedCount > 0) {
487
+ console.log(` ✅ Removed numbering from ${cleanedCount} highlight span(s)`);
488
+ }
489
+
490
+ return content;
491
+ }
492
+
493
+ function transformReferenceLinks(content) {
494
+ console.log(' 🔗 Transforming reference links...');
495
+
496
+ // Transform Pandoc reference links: [text](#ref){reference-type="ref" reference="ref"}
497
+ return content.replace(
498
+ /\[([^\]]+)\]\((#[^)]+)\)\{[^}]*reference[^}]*\}/g,
499
+ (match, text, href) => {
500
+ return `[${text}](${href})`;
501
+ }
502
+ );
503
+ }
504
+
505
+
506
+ /**
507
+ * Fix frontmatter and ensure proper MDX format
508
+ * @param {string} content - MDX content
509
+ * @param {string} latexContent - Original LaTeX content for metadata extraction
510
+ * @returns {string} - Content with proper frontmatter
511
+ */
512
+ function ensureFrontmatter(content, latexContent = '') {
513
+ console.log(' 📄 Ensuring proper frontmatter...');
514
+
515
+ if (!content.startsWith('---')) {
516
+ let frontmatter;
517
+
518
+ if (latexContent) {
519
+ // Extract metadata from LaTeX using dedicated module
520
+ frontmatter = extractAndGenerateFrontmatter(latexContent);
521
+ console.log(' ✅ Generated frontmatter from LaTeX metadata');
522
+ } else {
523
+ // Fallback frontmatter
524
+ const currentDate = new Date().toLocaleDateString('en-US', {
525
+ year: 'numeric',
526
+ month: 'short',
527
+ day: '2-digit'
528
+ });
529
+ frontmatter = `---
530
+ title: "Research Article"
531
+ published: "${currentDate}"
532
+ tableOfContentsAutoCollapse: true
533
+ ---
534
+
535
+ `;
536
+ console.log(' ✅ Generated basic frontmatter');
537
+ }
538
+
539
+ return frontmatter + content;
540
+ }
541
+
542
+ return content;
543
+ }
544
+
545
+ /**
546
+ * Fix mixed math delimiters like $`...`$ or `...`$
547
+ * @param {string} content - MDX content
548
+ * @returns {string} - Content with fixed math delimiters
549
+ */
550
+ function fixMixedMathDelimiters(content) {
551
+ console.log(' 🔧 Fixing mixed math delimiters...');
552
+
553
+ let fixedCount = 0;
554
+
555
+ // Fix patterns like $`...`$ (mixed delimiters)
556
+ content = content.replace(/\$`([^`]*)`\$/g, (match, mathContent) => {
557
+ fixedCount++;
558
+ return `$${mathContent}$`;
559
+ });
560
+
561
+ // Fix patterns like `...`$ (backtick start, dollar end)
562
+ content = content.replace(/`([^`]*)`\$/g, (match, mathContent) => {
563
+ fixedCount++;
564
+ return `$${mathContent}$`;
565
+ });
566
+
567
+ // Fix patterns like $`...` (dollar start, backtick end - less common)
568
+ content = content.replace(/\$`([^`]*)`(?!\$)/g, (match, mathContent) => {
569
+ fixedCount++;
570
+ return `$${mathContent}$`;
571
+ });
572
+
573
+ if (fixedCount > 0) {
574
+ console.log(` ✅ Fixed ${fixedCount} mixed math delimiter(s)`);
575
+ }
576
+
577
+ return content;
578
+ }
579
+
580
+ /**
581
+ * Clean up orphaned math delimiters and fix mixed content
582
+ * @param {string} content - MDX content
583
+ * @returns {string} - Content with cleaned math blocks
584
+ */
585
+ function cleanOrphanedMathDelimiters(content) {
586
+ console.log(' 🧹 Cleaning orphaned math delimiters...');
587
+ console.log(' 🔍 Content length:', content.length, 'chars');
588
+
589
+ let fixedCount = 0;
590
+
591
+ // Fix orphaned $$ that are alone on lines (but not part of display math blocks)
592
+ // Only remove $$ that appear alone without corresponding closing $$
593
+ content = content.replace(/^\$\$\s*$(?!\s*[\s\S]*?\$\$)/gm, () => {
594
+ fixedCount++;
595
+ return '';
596
+ });
597
+
598
+ // Fix backticks inside $$....$$ blocks (Pandoc artifact)
599
+ const mathMatches = content.match(/\$\$([\s\S]*?)\$\$/g);
600
+ console.log(` 🔍 Found ${mathMatches ? mathMatches.length : 0} math blocks`);
601
+
602
+ content = content.replace(/\$\$([\s\S]*?)\$\$/g, (match, mathContent) => {
603
+ // More aggressive: remove ALL single backticks in math blocks (they shouldn't be there)
604
+ let cleanedMath = mathContent;
605
+
606
+ // Count backticks before
607
+ const backticksBefore = (mathContent.match(/`/g) || []).length;
608
+
609
+ if (backticksBefore > 0) {
610
+ console.log(` 🔧 Found math block with ${backticksBefore} backtick(s)`);
611
+ }
612
+
613
+ // Remove all isolated backticks (not in pairs)
614
+ cleanedMath = cleanedMath.replace(/`/g, '');
615
+
616
+ const backticksAfter = (cleanedMath.match(/`/g) || []).length;
617
+
618
+ if (backticksBefore > 0) {
619
+ fixedCount++;
620
+ console.log(` 🔧 Removed ${backticksBefore} backtick(s) from math block`);
621
+ return `$$${cleanedMath}$$`;
622
+ }
623
+ return match;
624
+ });
625
+
626
+ // Fix escaped align in math blocks: \begin{align} -> \begin{align}
627
+ content = content.replace(/\\begin\{align\}/g, (match) => {
628
+ fixedCount++;
629
+ return '\\begin{align}';
630
+ });
631
+
632
+ content = content.replace(/\\end\{align\}/g, (match) => {
633
+ fixedCount++;
634
+ return '\\end{align}';
635
+ });
636
+
637
+ // Fix cases where text gets mixed with math blocks
638
+ // Pattern: ``` math ... ``` text ``` math
639
+ content = content.replace(/``` math\s*\n([\s\S]*?)\n```\s*([^`\n]*?)\s*``` math/g, (match, math1, text, math2) => {
640
+ if (text.trim().length > 0 && !text.includes('```')) {
641
+ fixedCount++;
642
+ return '```' + ' math\n' + math1 + '\n```\n\n' + text.trim() + '\n\n```' + ' math';
643
+ }
644
+ return match;
645
+ });
646
+
647
+ if (fixedCount > 0) {
648
+ console.log(` ✅ Fixed ${fixedCount} orphaned math delimiter(s)`);
649
+ }
650
+
651
+ return content;
652
+ }
653
+
654
+ /**
655
+ * Clean newlines from single-dollar math blocks ($...$) ONLY
656
+ * @param {string} content - MDX content
657
+ * @returns {string} - Content with cleaned math blocks
658
+ */
659
+ function cleanSingleLineMathNewlines(content) {
660
+ console.log(' 🔢 Cleaning newlines in single-dollar math blocks ($...$)...');
661
+
662
+ let cleanedCount = 0;
663
+
664
+ // ULTRA STRICT: Only target single dollar blocks ($...$) that contain newlines
665
+ // Use dotall flag (s) to match newlines with .*, and ensure we don't match $$
666
+ const cleanedContent = content.replace(/\$(?!\$)([\s\S]*?)\$(?!\$)/g, (match, mathContent) => {
667
+ // Only process if the content contains newlines
668
+ if (mathContent.includes('\n')) {
669
+ cleanedCount++;
670
+
671
+ // Remove ALL newlines and carriage returns, normalize whitespace
672
+ const cleanedMath = mathContent
673
+ .replace(/\n+/g, ' ') // Replace all newlines with spaces
674
+ .replace(/\r+/g, ' ') // Replace carriage returns with spaces
675
+ .replace(/\s+/g, ' ') // Normalize multiple spaces to single
676
+ .trim(); // Remove leading/trailing spaces
677
+
678
+ return `$${cleanedMath}$`;
679
+ }
680
+ return match; // Keep original if no newlines
681
+ });
682
+
683
+ if (cleanedCount > 0) {
684
+ console.log(` ✅ Cleaned ${cleanedCount} single-dollar math block(s) with newlines`);
685
+ }
686
+
687
+ return cleanedContent;
688
+ }
689
+
690
+ /**
691
+ * Add proper line breaks around display math blocks ($$...$$)
692
+ * @param {string} content - MDX content
693
+ * @returns {string} - Content with properly spaced display math
694
+ */
695
+ function formatDisplayMathBlocks(content) {
696
+ console.log(' 📐 Formatting display math blocks with proper spacing...');
697
+
698
+ let formattedCount = 0;
699
+
700
+ // Find all $$...$$$ blocks (display math) and ensure proper line breaks
701
+ // Very strict: only matches exactly $$ followed by content followed by $$
702
+ const formattedContent = content.replace(/\$\$([\s\S]*?)\$\$/g, (match, mathContent) => {
703
+ formattedCount++;
704
+
705
+ // Clean up the math content - trim whitespace but preserve structure
706
+ const cleanedMath = mathContent.trim();
707
+
708
+ // Return with proper line breaks before and after
709
+ return `\n$$\n${cleanedMath}\n$$\n`;
710
+ });
711
+
712
+ if (formattedCount > 0) {
713
+ console.log(` ✅ Formatted ${formattedCount} display math block(s) with proper spacing`);
714
+ }
715
+
716
+ return formattedContent;
717
+ }
718
+
719
+ /**
720
+ * Clean newlines from figcaption content
721
+ * @param {string} content - MDX content
722
+ * @returns {string} - Content with cleaned figcaptions
723
+ */
724
+ function cleanFigcaptionNewlines(content) {
725
+ console.log(' 📝 Cleaning newlines in figcaption elements...');
726
+
727
+ let cleanedCount = 0;
728
+
729
+ // Find all <figcaption>...</figcaption> blocks and remove internal newlines
730
+ const cleanedContent = content.replace(/<figcaption([^>]*)>([\s\S]*?)<\/figcaption>/g, (match, attributes, captionContent) => {
731
+ // Only process if the content contains newlines
732
+ if (captionContent.includes('\n')) {
733
+ cleanedCount++;
734
+
735
+ // Remove newlines and normalize whitespace
736
+ const cleanedCaption = captionContent
737
+ .replace(/\n+/g, ' ') // Replace newlines with spaces
738
+ .replace(/\s+/g, ' ') // Normalize multiple spaces
739
+ .trim(); // Trim whitespace
740
+
741
+ return `<figcaption${attributes}>${cleanedCaption}</figcaption>`;
742
+ }
743
+
744
+ return match; // Return unchanged if no newlines
745
+ });
746
+
747
+ if (cleanedCount > 0) {
748
+ console.log(` ✅ Cleaned ${cleanedCount} figcaption element(s)`);
749
+ } else {
750
+ console.log(` ℹ️ No figcaption elements with newlines found`);
751
+ }
752
+
753
+ return cleanedContent;
754
+ }
755
+
756
+ /**
757
+ * Remove HTML comments from MDX content
758
+ * @param {string} content - MDX content
759
+ * @returns {string} - Content without HTML comments
760
+ */
761
+ function removeHtmlComments(content) {
762
+ console.log(' 🗑️ Removing HTML comments...');
763
+
764
+ let removedCount = 0;
765
+
766
+ // Remove all HTML comments <!-- ... -->
767
+ const cleanedContent = content.replace(/<!--[\s\S]*?-->/g, () => {
768
+ removedCount++;
769
+ return '';
770
+ });
771
+
772
+ if (removedCount > 0) {
773
+ console.log(` ✅ Removed ${removedCount} HTML comment(s)`);
774
+ }
775
+
776
+ return cleanedContent;
777
+ }
778
+
779
+ /**
780
+ * Clean up MDX-incompatible syntax
781
+ * @param {string} content - MDX content
782
+ * @returns {string} - Cleaned content
783
+ */
784
+ function cleanMdxSyntax(content) {
785
+ console.log(' 🧹 Cleaning MDX syntax...');
786
+
787
+ return content
788
+ // NOTE: Math delimiter fixing is now handled by fixMixedMathDelimiters()
789
+ // Ensure proper spacing around JSX-like constructs
790
+ .replace(/>\s*</g, '>\n<')
791
+ // Remove problematic heading attributes - be more specific to avoid matching \begin{align}
792
+ .replace(/^(#{1,6}\s+[^{#\n]+)\{[^}]+\}$/gm, '$1')
793
+ // Fix escaped quotes in text
794
+ .replace(/\\("|')/g, '$1');
795
+ }
796
+
797
+ /**
798
+ * Main MDX processing function that applies all transformations
799
+ * @param {string} content - Raw Markdown content
800
+ * @param {string} latexContent - Original LaTeX content for metadata extraction
801
+ * @returns {string} - Processed MDX content compatible with Astro
802
+ */
803
+ function processMdxContent(content, latexContent = '') {
804
+ console.log('🔧 Processing for Astro MDX compatibility...');
805
+
806
+ // Clear previous tracking
807
+ usedComponents.clear();
808
+ imageImports.clear();
809
+
810
+ let processedContent = content;
811
+
812
+ // Apply each transformation step sequentially
813
+ processedContent = ensureFrontmatter(processedContent, latexContent);
814
+ processedContent = fixMixedMathDelimiters(processedContent);
815
+
816
+ // Debug: check for $$ blocks after fixMixedMathDelimiters
817
+ const mathBlocksAfterMixed = (processedContent.match(/\$\$([\s\S]*?)\$\$/g) || []).length;
818
+ console.log(` 📊 Math blocks after mixed delimiters fix: ${mathBlocksAfterMixed}`);
819
+
820
+ processedContent = cleanOrphanedMathDelimiters(processedContent);
821
+ processedContent = cleanSingleLineMathNewlines(processedContent);
822
+ processedContent = formatDisplayMathBlocks(processedContent);
823
+ processedContent = removeHtmlComments(processedContent);
824
+ processedContent = cleanMdxSyntax(processedContent);
825
+ processedContent = convertSubfiguresToMultiFigure(processedContent);
826
+ processedContent = transformImages(processedContent);
827
+ processedContent = transformStyledSpans(processedContent);
828
+ processedContent = transformReferenceLinks(processedContent);
829
+ processedContent = fixHtmlEscaping(processedContent);
830
+ processedContent = cleanHighlightNumbering(processedContent);
831
+ processedContent = cleanFigcaptionNewlines(processedContent);
832
+
833
+ // Add component imports at the end
834
+ processedContent = addComponentImports(processedContent);
835
+
836
+ return processedContent;
837
+ }
838
+
839
+ function convertToMdx(inputFile, outputFile) {
840
+ console.log('📝 Modular Markdown to Astro MDX Converter');
841
+ console.log(`📁 Input: ${inputFile}`);
842
+ console.log(`📁 Output: ${outputFile}`);
843
+
844
+ // Check if input file exists
845
+ if (!existsSync(inputFile)) {
846
+ console.error(`❌ Input file not found: ${inputFile}`);
847
+ process.exit(1);
848
+ }
849
+
850
+ try {
851
+ console.log('🔄 Reading Markdown file...');
852
+ const markdownContent = readFileSync(inputFile, 'utf8');
853
+
854
+ // Try to read original LaTeX file for metadata extraction
855
+ let latexContent = '';
856
+ try {
857
+ const inputDir = dirname(inputFile);
858
+ const latexFile = join(inputDir, '..', 'input', 'main.tex');
859
+ if (existsSync(latexFile)) {
860
+ latexContent = readFileSync(latexFile, 'utf8');
861
+ }
862
+ } catch (error) {
863
+ // Ignore LaTeX reading errors - we'll use fallback frontmatter
864
+ }
865
+
866
+ // Apply modular MDX processing
867
+ const mdxContent = processMdxContent(markdownContent, latexContent);
868
+
869
+ console.log('💾 Writing MDX file...');
870
+ writeFileSync(outputFile, mdxContent);
871
+
872
+ console.log(`✅ Conversion completed: ${outputFile}`);
873
+
874
+ // Show file size
875
+ const inputSize = Math.round(markdownContent.length / 1024);
876
+ const outputSize = Math.round(mdxContent.length / 1024);
877
+ console.log(`📊 Input: ${inputSize}KB → Output: ${outputSize}KB`);
878
+
879
+ } catch (error) {
880
+ console.error('❌ Conversion failed:');
881
+ console.error(error.message);
882
+ process.exit(1);
883
+ }
884
+ }
885
+
886
+ export { convertToMdx };
887
+
888
+ function main() {
889
+ const config = parseArgs();
890
+ convertToMdx(config.input, config.output);
891
+ console.log('🎉 MDX conversion completed!');
892
+ }
893
+
894
+ if (import.meta.url === `file://${process.argv[1]}`) {
895
+ main();
896
+ }
app/scripts/latex-importer/metadata-extractor.mjs ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * LaTeX Metadata Extractor
3
+ * Extracts document metadata from LaTeX files for frontmatter generation
4
+ */
5
+
6
+ /**
7
+ * Extract metadata from LaTeX content
8
+ * @param {string} latexContent - Raw LaTeX content
9
+ * @returns {object} - Extracted metadata object
10
+ */
11
+ export function extractLatexMetadata(latexContent) {
12
+ const metadata = {};
13
+
14
+ // Extract title
15
+ const titleMatch = latexContent.match(/\\title\s*\{\s*([^}]+)\s*\}/s);
16
+ if (titleMatch) {
17
+ metadata.title = titleMatch[1]
18
+ .replace(/\n/g, ' ')
19
+ .trim();
20
+ }
21
+
22
+ // Extract authors with their specific affiliations
23
+ const authors = [];
24
+ const authorMatches = latexContent.matchAll(/\\authorOne\[[^\]]*\]\{([^}]+)\}/g);
25
+
26
+ for (const match of authorMatches) {
27
+ const fullAuthorInfo = match[1];
28
+
29
+ // Determine affiliations based on macros present
30
+ const affiliations = [];
31
+ if (fullAuthorInfo.includes('\\ensps')) {
32
+ affiliations.push(1); // École Normale Supérieure
33
+ }
34
+ if (fullAuthorInfo.includes('\\hf')) {
35
+ affiliations.push(2); // Hugging Face
36
+ }
37
+
38
+ // Clean author name by removing macros
39
+ let authorName = fullAuthorInfo
40
+ .replace(/\\ensps/g, '') // Remove École macro
41
+ .replace(/\\hf/g, '') // Remove Hugging Face macro
42
+ .replace(/\s+/g, ' ') // Normalize whitespace
43
+ .trim();
44
+
45
+ // Skip empty authors or placeholder entries
46
+ if (authorName && authorName !== '...') {
47
+ authors.push({
48
+ name: authorName,
49
+ affiliations: affiliations.length > 0 ? affiliations : [2] // Default to HF if no macro
50
+ });
51
+ }
52
+ }
53
+
54
+ if (authors.length > 0) {
55
+ metadata.authors = authors;
56
+ }
57
+
58
+ // Extract affiliations - create the two distinct affiliations
59
+ metadata.affiliations = [
60
+ {
61
+ name: "École Normale Supérieure Paris-Saclay"
62
+ },
63
+ {
64
+ name: "Hugging Face"
65
+ }
66
+ ];
67
+
68
+ // Extract date if available (common LaTeX patterns)
69
+ const datePatterns = [
70
+ /\\date\s*\{([^}]+)\}/,
71
+ /\\newcommand\s*\{\\date\}\s*\{([^}]+)\}/,
72
+ ];
73
+
74
+ for (const pattern of datePatterns) {
75
+ const dateMatch = latexContent.match(pattern);
76
+ if (dateMatch) {
77
+ metadata.published = dateMatch[1].trim();
78
+ break;
79
+ }
80
+ }
81
+
82
+ // Fallback to current date if no date found
83
+ if (!metadata.published) {
84
+ metadata.published = new Date().toLocaleDateString('en-US', {
85
+ year: 'numeric',
86
+ month: 'short',
87
+ day: '2-digit'
88
+ });
89
+ }
90
+
91
+ return metadata;
92
+ }
93
+
94
+ /**
95
+ * Generate YAML frontmatter from metadata object
96
+ * @param {object} metadata - Metadata object
97
+ * @returns {string} - YAML frontmatter string
98
+ */
99
+ export function generateFrontmatter(metadata) {
100
+ let frontmatter = '---\n';
101
+
102
+ // Title
103
+ if (metadata.title) {
104
+ frontmatter += `title: "${metadata.title}"\n`;
105
+ }
106
+
107
+ // Authors
108
+ if (metadata.authors && metadata.authors.length > 0) {
109
+ frontmatter += 'authors:\n';
110
+ metadata.authors.forEach(author => {
111
+ frontmatter += ` - name: "${author.name}"\n`;
112
+ if (author.url) {
113
+ frontmatter += ` url: "${author.url}"\n`;
114
+ }
115
+ frontmatter += ` affiliations: [${author.affiliations.join(', ')}]\n`;
116
+ });
117
+ }
118
+
119
+ // Affiliations
120
+ if (metadata.affiliations && metadata.affiliations.length > 0) {
121
+ frontmatter += 'affiliations:\n';
122
+ metadata.affiliations.forEach((affiliation, index) => {
123
+ frontmatter += ` - name: "${affiliation.name}"\n`;
124
+ if (affiliation.url) {
125
+ frontmatter += ` url: "${affiliation.url}"\n`;
126
+ }
127
+ });
128
+ }
129
+
130
+ // Publication date
131
+ if (metadata.published) {
132
+ frontmatter += `published: "${metadata.published}"\n`;
133
+ }
134
+
135
+ // Additional metadata
136
+ if (metadata.doi) {
137
+ frontmatter += `doi: "${metadata.doi}"\n`;
138
+ }
139
+
140
+ if (metadata.description) {
141
+ frontmatter += `description: "${metadata.description}"\n`;
142
+ }
143
+
144
+ if (metadata.licence) {
145
+ frontmatter += `licence: >\n ${metadata.licence}\n`;
146
+ }
147
+
148
+ if (metadata.tags && metadata.tags.length > 0) {
149
+ frontmatter += 'tags:\n';
150
+ metadata.tags.forEach(tag => {
151
+ frontmatter += ` - ${tag}\n`;
152
+ });
153
+ }
154
+
155
+ // Default Astro configuration
156
+ frontmatter += 'tableOfContentsAutoCollapse: true\n';
157
+ frontmatter += '---\n\n';
158
+
159
+ return frontmatter;
160
+ }
161
+
162
+ /**
163
+ * Extract and generate frontmatter from LaTeX content
164
+ * @param {string} latexContent - Raw LaTeX content
165
+ * @returns {string} - Complete YAML frontmatter
166
+ */
167
+ export function extractAndGenerateFrontmatter(latexContent) {
168
+ const metadata = extractLatexMetadata(latexContent);
169
+ return generateFrontmatter(metadata);
170
+ }
app/scripts/latex-importer/package-lock.json ADDED
Binary file (56.7 kB). View file
 
app/scripts/latex-importer/package.json ADDED
Binary file (967 Bytes). View file
 
app/scripts/latex-importer/post-processor.mjs ADDED
@@ -0,0 +1,439 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env node
2
+
3
+ import { readFileSync, writeFileSync, existsSync, readdirSync } from 'fs';
4
+ import { join, dirname } from 'path';
5
+ import { fileURLToPath } from 'url';
6
+
7
+ const __filename = fileURLToPath(import.meta.url);
8
+ const __dirname = dirname(__filename);
9
+
10
+ /**
11
+ * Post-processor for cleaning Markdown content from LaTeX conversion
12
+ * Each function handles a specific type of cleanup for maintainability
13
+ */
14
+
15
+ /**
16
+ * Remove TeX low-level grouping commands that break KaTeX
17
+ * @param {string} content - Markdown content
18
+ * @returns {string} - Cleaned content
19
+ */
20
+ function removeTexGroupingCommands(content) {
21
+ console.log(' 🧹 Removing TeX grouping commands...');
22
+
23
+ return content
24
+ .replace(/\\mathopen\{\}\\mathclose\\bgroup/g, '')
25
+ .replace(/\\aftergroup\\egroup/g, '')
26
+ .replace(/\\bgroup/g, '')
27
+ .replace(/\\egroup/g, '');
28
+ }
29
+
30
+ /**
31
+ * Simplify LaTeX delimiter constructions
32
+ * @param {string} content - Markdown content
33
+ * @returns {string} - Cleaned content
34
+ */
35
+ function simplifyLatexDelimiters(content) {
36
+ console.log(' 🔧 Simplifying LaTeX delimiters...');
37
+
38
+ return content
39
+ .replace(/\\left\[\s*/g, '[')
40
+ .replace(/\s*\\right\]/g, ']');
41
+ }
42
+
43
+ /**
44
+ * Remove orphaned LaTeX labels
45
+ * @param {string} content - Markdown content
46
+ * @returns {string} - Cleaned content
47
+ */
48
+ function removeOrphanedLabels(content) {
49
+ console.log(' 🏷️ Removing orphaned labels...');
50
+
51
+ return content
52
+ .replace(/^\s*\\label\{[^}]+\}\s*$/gm, '')
53
+ .replace(/\\label\{[^}]+\}/g, '');
54
+ }
55
+
56
+ /**
57
+ * Fix KaTeX-incompatible math commands
58
+ * @param {string} content - Markdown content
59
+ * @returns {string} - Cleaned content
60
+ */
61
+ function fixMathCommands(content) {
62
+ console.log(' 📐 Fixing KaTeX-incompatible math commands...');
63
+
64
+ return content
65
+ // Replace \hdots with \ldots (KaTeX compatible)
66
+ .replace(/\\hdots/g, '\\ldots')
67
+ // Add more math command fixes here as needed
68
+ .replace(/\\vdots/g, '\\vdots'); // This one should be fine, but kept for consistency
69
+ }
70
+
71
+ /**
72
+ * Convert LaTeX matrix commands to KaTeX-compatible environments
73
+ * @param {string} content - Markdown content
74
+ * @returns {string} - Content with fixed matrix commands
75
+ */
76
+ function fixMatrixCommands(content) {
77
+ console.log(' 🔢 Converting matrix commands to KaTeX format...');
78
+
79
+ let fixedCount = 0;
80
+
81
+ // Convert \pmatrix{...} to \begin{pmatrix}...\end{pmatrix}
82
+ content = content.replace(/\\pmatrix\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}/g, (match, matrixContent) => {
83
+ fixedCount++;
84
+ // Split by \\ for rows, handle nested braces
85
+ const rows = matrixContent.split('\\\\').map(row => row.trim()).filter(row => row);
86
+ return `\\begin{pmatrix}\n${rows.join(' \\\\\n')}\n\\end{pmatrix}`;
87
+ });
88
+
89
+ // Convert \bmatrix{...} to \begin{bmatrix}...\end{bmatrix}
90
+ content = content.replace(/\\bmatrix\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}/g, (match, matrixContent) => {
91
+ fixedCount++;
92
+ const rows = matrixContent.split('\\\\').map(row => row.trim()).filter(row => row);
93
+ return `\\begin{bmatrix}\n${rows.join(' \\\\\n')}\n\\end{bmatrix}`;
94
+ });
95
+
96
+ // Convert \vmatrix{...} to \begin{vmatrix}...\end{vmatrix}
97
+ content = content.replace(/\\vmatrix\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}/g, (match, matrixContent) => {
98
+ fixedCount++;
99
+ const rows = matrixContent.split('\\\\').map(row => row.trim()).filter(row => row);
100
+ return `\\begin{vmatrix}\n${rows.join(' \\\\\n')}\n\\end{vmatrix}`;
101
+ });
102
+
103
+ if (fixedCount > 0) {
104
+ console.log(` ✅ Fixed ${fixedCount} matrix command(s)`);
105
+ }
106
+
107
+ return content;
108
+ }
109
+
110
+ /**
111
+ * Fix Unicode characters that break MDX/JSX parsing
112
+ * @param {string} content - Markdown content
113
+ * @returns {string} - Cleaned content
114
+ */
115
+ function fixUnicodeIssues(content) {
116
+ console.log(' 🌐 Fixing Unicode characters for MDX compatibility...');
117
+
118
+ return content
119
+ // Replace Unicode middle dot (·) with \cdot in math expressions
120
+ .replace(/\$([^$]*?)·([^$]*?)\$/g, (match, before, after) => {
121
+ return `$${before}\\cdot${after}$`;
122
+ })
123
+ // Replace Unicode middle dot in display math
124
+ .replace(/\$\$([^$]*?)·([^$]*?)\$\$/g, (match, before, after) => {
125
+ return `$$${before}\\cdot${after}$$`;
126
+ })
127
+ // Replace other problematic Unicode characters
128
+ .replace(/[""]/g, '"') // Smart quotes to regular quotes
129
+ .replace(/['']/g, "'") // Smart apostrophes to regular apostrophes
130
+ .replace(/…/g, '...') // Ellipsis to three dots
131
+ .replace(/–/g, '-') // En dash to hyphen
132
+ .replace(/—/g, '--'); // Em dash to double hyphen
133
+ }
134
+
135
+ /**
136
+ * Fix multiline math expressions for MDX compatibility
137
+ * @param {string} content - Markdown content
138
+ * @returns {string} - Cleaned content
139
+ */
140
+ function fixMultilineMath(content) {
141
+ console.log(' 📏 Fixing multiline math expressions for MDX...');
142
+
143
+ return content
144
+ // Convert multiline inline math to display math blocks (more precise regex)
145
+ // Only match if the content is a self-contained math expression within a single line
146
+ .replace(/\$([^$\n]*\\\\[^$\n]*)\$/g, (match, mathContent) => {
147
+ // Only convert if it contains actual math operators and line breaks
148
+ if (mathContent.includes('\\\\') && /[=+\-*/^_{}]/.test(mathContent)) {
149
+ // Remove leading/trailing whitespace and normalize newlines
150
+ const cleanedMath = mathContent
151
+ .replace(/^\s+|\s+$/g, '')
152
+ .replace(/\s*\\\\\s*/g, '\\\\\n ');
153
+ return `$$\n${cleanedMath}\n$$`;
154
+ }
155
+ return match; // Keep original if it doesn't look like multiline math
156
+ })
157
+ // Ensure display math blocks are properly separated
158
+ .replace(/\$\$\s*\n\s*([^$]+?)\s*\n\s*\$\$/g, (match, mathContent) => {
159
+ return `\n$$\n${mathContent.trim()}\n$$\n`;
160
+ });
161
+ }
162
+
163
+ /**
164
+ * Inject code snippets into empty code blocks
165
+ * @param {string} content - Markdown content
166
+ * @param {string} inputDir - Directory containing the LaTeX source and snippets
167
+ * @returns {string} - Content with injected code snippets
168
+ */
169
+ function injectCodeSnippets(content, inputDir = null) {
170
+ console.log(' 💻 Injecting code snippets...');
171
+
172
+ if (!inputDir) {
173
+ console.log(' ⚠️ No input directory provided, skipping code injection');
174
+ return content;
175
+ }
176
+
177
+ const snippetsDir = join(inputDir, 'snippets');
178
+
179
+ if (!existsSync(snippetsDir)) {
180
+ console.log(' ⚠️ Snippets directory not found, skipping code injection');
181
+ return content;
182
+ }
183
+
184
+ // Get all available snippet files
185
+ let availableSnippets = [];
186
+ try {
187
+ availableSnippets = readdirSync(snippetsDir);
188
+ console.log(` 📁 Found ${availableSnippets.length} snippet file(s): ${availableSnippets.join(', ')}`);
189
+ } catch (error) {
190
+ console.log(` ❌ Error reading snippets directory: ${error.message}`);
191
+ return content;
192
+ }
193
+
194
+ // Find all empty code blocks
195
+ const emptyCodeBlockPattern = /```\s*(\w+)\s*\n\s*```/g;
196
+
197
+ let processedContent = content;
198
+ let injectionCount = 0;
199
+
200
+ processedContent = processedContent.replace(emptyCodeBlockPattern, (match, language) => {
201
+ // Map language names to file extensions
202
+ const extensionMap = {
203
+ 'python': 'py',
204
+ 'javascript': 'js',
205
+ 'typescript': 'ts',
206
+ 'bash': 'sh',
207
+ 'shell': 'sh'
208
+ };
209
+
210
+ const fileExtension = extensionMap[language] || language;
211
+
212
+ // Try to find a matching snippet file for this language
213
+ const matchingFiles = availableSnippets.filter(file =>
214
+ file.endsWith(`.${fileExtension}`)
215
+ );
216
+
217
+ if (matchingFiles.length === 0) {
218
+ console.log(` ⚠️ No ${language} snippet found (looking for .${fileExtension})`);
219
+ return match;
220
+ }
221
+
222
+ // Use the first matching file (could be made smarter with context analysis)
223
+ const selectedFile = matchingFiles[0];
224
+ const snippetPath = join(snippetsDir, selectedFile);
225
+
226
+ try {
227
+ const snippetContent = readFileSync(snippetPath, 'utf8');
228
+ injectionCount++;
229
+ console.log(` ✅ Injected: ${selectedFile}`);
230
+ return `\`\`\`${language}\n${snippetContent.trim()}\n\`\`\``;
231
+ } catch (error) {
232
+ console.log(` ❌ Error reading ${selectedFile}: ${error.message}`);
233
+ return match;
234
+ }
235
+ });
236
+
237
+ if (injectionCount > 0) {
238
+ console.log(` 📊 Injected ${injectionCount} code snippet(s)`);
239
+ }
240
+
241
+ return processedContent;
242
+ }
243
+
244
+ /**
245
+ * Fix all attributes that still contain colons (href, data-reference, id)
246
+ * @param {string} content - Markdown content
247
+ * @returns {string} - Cleaned content
248
+ */
249
+ function fixAllAttributes(content) {
250
+ console.log(' 🔗 Fixing all attributes with colons...');
251
+
252
+ let fixedCount = 0;
253
+
254
+ // Fix href attributes containing colons
255
+ content = content.replace(/href="([^"]*):([^"]*)"/g, (match, before, after) => {
256
+ fixedCount++;
257
+ return `href="${before}-${after}"`;
258
+ });
259
+
260
+ // Fix data-reference attributes containing colons
261
+ content = content.replace(/data-reference="([^"]*):([^"]*)"/g, (match, before, after) => {
262
+ fixedCount++;
263
+ return `data-reference="${before}-${after}"`;
264
+ });
265
+
266
+ // Fix id attributes containing colons (like in Figure components)
267
+ content = content.replace(/id="([^"]*):([^"]*)"/g, (match, before, after) => {
268
+ fixedCount++;
269
+ return `id="${before}-${after}"`;
270
+ });
271
+
272
+ if (fixedCount > 0) {
273
+ console.log(` ✅ Fixed ${fixedCount} attribute(s) with colons`);
274
+ }
275
+
276
+ return content;
277
+ }
278
+
279
+ /**
280
+ * Fix link text content that still contains colons
281
+ * @param {string} content - Markdown content
282
+ * @returns {string} - Cleaned content
283
+ */
284
+ function fixLinkTextContent(content) {
285
+ console.log(' 📝 Fixing link text content with colons...');
286
+
287
+ let fixedCount = 0;
288
+
289
+ // Fix text content within links that contain references with colons
290
+ // Pattern: <a ...>[text:content]</a>
291
+ const cleanedContent = content.replace(/<a([^>]*)>\[([^:]*):([^\]]*)\]<\/a>/g, (match, attributes, before, after) => {
292
+ fixedCount++;
293
+ return `<a${attributes}>[${before}-${after}]</a>`;
294
+ });
295
+
296
+ if (fixedCount > 0) {
297
+ console.log(` ✅ Fixed ${fixedCount} link text(s) with colons`);
298
+ }
299
+
300
+ return cleanedContent;
301
+ }
302
+
303
+ /**
304
+ * Convert align anchor markers to proper HTML spans outside math blocks
305
+ * @param {string} content - Markdown content
306
+ * @returns {string} - Content with converted anchor spans
307
+ */
308
+ function convertAlignAnchors(content) {
309
+ console.log(' 🏷️ Converting align anchor markers to HTML spans...');
310
+
311
+ let convertedCount = 0;
312
+
313
+ // Find and replace align anchor markers with proper spans outside math blocks
314
+ content = content.replace(/``` math\n%%ALIGN_ANCHOR_ID\{([^}]+)\}%%\n([\s\S]*?)\n```/g, (match, anchorId, mathContent) => {
315
+ convertedCount++;
316
+ return `<span id="${anchorId}" style="position: absolute;"></span>\n\n\`\`\` math\n${mathContent}\n\`\`\``;
317
+ });
318
+
319
+ if (convertedCount > 0) {
320
+ console.log(` ✅ Converted ${convertedCount} align anchor marker(s) to spans`);
321
+ }
322
+
323
+ return content;
324
+ }
325
+
326
+ /**
327
+ * Main post-processing function that applies all cleanup steps
328
+ * @param {string} content - Raw Markdown content from Pandoc
329
+ * @param {string} inputDir - Optional: Directory containing LaTeX source for code injection
330
+ * @returns {string} - Cleaned Markdown content
331
+ */
332
+ export function postProcessMarkdown(content, inputDir = null) {
333
+ console.log('🔧 Post-processing for KaTeX compatibility...');
334
+
335
+ let processedContent = content;
336
+
337
+ // Apply each cleanup step sequentially
338
+ processedContent = removeTexGroupingCommands(processedContent);
339
+ processedContent = simplifyLatexDelimiters(processedContent);
340
+ processedContent = removeOrphanedLabels(processedContent);
341
+ processedContent = convertAlignAnchors(processedContent);
342
+ processedContent = fixMathCommands(processedContent);
343
+ processedContent = fixMatrixCommands(processedContent);
344
+ processedContent = fixUnicodeIssues(processedContent);
345
+ processedContent = fixMultilineMath(processedContent);
346
+ processedContent = fixAllAttributes(processedContent);
347
+ processedContent = fixLinkTextContent(processedContent);
348
+
349
+ // Inject code snippets if input directory is provided
350
+ if (inputDir) {
351
+ processedContent = injectCodeSnippets(processedContent, inputDir);
352
+ }
353
+
354
+ return processedContent;
355
+ }
356
+
357
+ /**
358
+ * CLI interface for standalone usage
359
+ */
360
+ function parseArgs() {
361
+ const args = process.argv.slice(2);
362
+ const config = {
363
+ input: join(__dirname, 'output', 'main.md'),
364
+ output: null, // Will default to input if not specified
365
+ verbose: false,
366
+ };
367
+
368
+ for (const arg of args) {
369
+ if (arg.startsWith('--input=')) {
370
+ config.input = arg.substring('--input='.length);
371
+ } else if (arg.startsWith('--output=')) {
372
+ config.output = arg.substring('--output='.length);
373
+ } else if (arg === '--verbose') {
374
+ config.verbose = true;
375
+ } else if (arg === '--help' || arg === '-h') {
376
+ console.log(`
377
+ 🔧 Markdown Post-Processor
378
+
379
+ Usage:
380
+ node post-processor.mjs [options]
381
+
382
+ Options:
383
+ --input=PATH Input Markdown file (default: output/main.md)
384
+ --output=PATH Output file (default: overwrites input)
385
+ --verbose Verbose output
386
+ --help, -h Show this help
387
+
388
+ Examples:
389
+ # Process main.md in-place
390
+ node post-processor.mjs
391
+
392
+ # Process with custom paths
393
+ node post-processor.mjs --input=raw.md --output=clean.md
394
+ `);
395
+ process.exit(0);
396
+ }
397
+ }
398
+
399
+ // Default output to input if not specified
400
+ if (!config.output) {
401
+ config.output = config.input;
402
+ }
403
+
404
+ return config;
405
+ }
406
+
407
+ function main() {
408
+ const config = parseArgs();
409
+
410
+ console.log('🔧 Markdown Post-Processor');
411
+ console.log(`📁 Input: ${config.input}`);
412
+ console.log(`📁 Output: ${config.output}`);
413
+
414
+ try {
415
+ const content = readFileSync(config.input, 'utf8');
416
+ const processedContent = postProcessMarkdown(content);
417
+
418
+ writeFileSync(config.output, processedContent);
419
+
420
+ console.log(`✅ Post-processing completed: ${config.output}`);
421
+
422
+ // Show stats if verbose
423
+ if (config.verbose) {
424
+ const originalLines = content.split('\n').length;
425
+ const processedLines = processedContent.split('\n').length;
426
+ console.log(`📊 Lines: ${originalLines} → ${processedLines}`);
427
+ }
428
+
429
+ } catch (error) {
430
+ console.error('❌ Post-processing failed:');
431
+ console.error(error.message);
432
+ process.exit(1);
433
+ }
434
+ }
435
+
436
+ // Run CLI if called directly
437
+ if (import.meta.url === `file://${process.argv[1]}`) {
438
+ main();
439
+ }
app/scripts/latex-importer/reference-preprocessor.mjs ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env node
2
+
3
+ /**
4
+ * LaTeX Reference Preprocessor
5
+ *
6
+ * This module cleans up LaTeX references BEFORE Pandoc conversion to ensure
7
+ * consistent, MDX-compatible identifiers throughout the document.
8
+ *
9
+ * What it does:
10
+ * - Removes prefixes from labels: \label{sec:intro} → \label{sec-intro}
11
+ * - Updates corresponding refs: \ref{sec:intro} → \ref{sec-intro}
12
+ * - Handles all reference types: sec:, fig:, eq:, table:, etc.
13
+ * - Maintains consistency between labels and references
14
+ */
15
+
16
+ /**
17
+ * Extract all references from LaTeX content
18
+ * @param {string} content - LaTeX content
19
+ * @returns {Object} - Object with labels and refs arrays
20
+ */
21
+ function extractReferences(content) {
22
+ const references = {
23
+ labels: new Set(),
24
+ refs: new Set(),
25
+ cites: new Set()
26
+ };
27
+
28
+ // Find all \label{...} commands
29
+ const labelMatches = content.matchAll(/\\label\{([^}]+)\}/g);
30
+ for (const match of labelMatches) {
31
+ references.labels.add(match[1]);
32
+ }
33
+
34
+ // Find all \ref{...} commands
35
+ const refMatches = content.matchAll(/\\ref\{([^}]+)\}/g);
36
+ for (const match of refMatches) {
37
+ references.refs.add(match[1]);
38
+ }
39
+
40
+ // Find all \cite{...} commands (already handled in existing code but included for completeness)
41
+ const citeMatches = content.matchAll(/\\cite[tp]?\{([^}]+)\}/g);
42
+ for (const match of citeMatches) {
43
+ // Handle multiple citations: \cite{ref1,ref2,ref3}
44
+ const citations = match[1].split(',').map(cite => cite.trim());
45
+ citations.forEach(cite => references.cites.add(cite));
46
+ }
47
+
48
+ return references;
49
+ }
50
+
51
+ /**
52
+ * Create clean identifier mapping
53
+ * @param {Object} references - References object from extractReferences
54
+ * @returns {Map} - Mapping from original to clean identifiers
55
+ */
56
+ function createCleanMapping(references) {
57
+ const mapping = new Map();
58
+
59
+ // Create mapping for all unique identifiers
60
+ const allIdentifiers = new Set([
61
+ ...references.labels,
62
+ ...references.refs
63
+ ]);
64
+
65
+ for (const id of allIdentifiers) {
66
+ // Remove common prefixes and replace colons with dashes
67
+ let cleanId = id
68
+ .replace(/^(sec|section|ch|chapter|fig|figure|eq|equation|tab|table|lst|listing|app|appendix):/gi, '')
69
+ .replace(/:/g, '-')
70
+ .replace(/[^a-zA-Z0-9_-]/g, '-') // Replace any other problematic characters
71
+ .replace(/-+/g, '-') // Collapse multiple dashes
72
+ .replace(/^-|-$/g, ''); // Remove leading/trailing dashes
73
+
74
+ // Ensure we don't have empty identifiers
75
+ if (!cleanId) {
76
+ cleanId = id.replace(/:/g, '-');
77
+ }
78
+
79
+ mapping.set(id, cleanId);
80
+ }
81
+
82
+ return mapping;
83
+ }
84
+
85
+ /**
86
+ * Convert labels to HTML anchor spans for better MDX compatibility
87
+ * @param {string} content - LaTeX content
88
+ * @param {Map} mapping - Identifier mapping (original -> clean)
89
+ * @returns {Object} - Result with content and count of conversions
90
+ */
91
+ function convertLabelsToAnchors(content, mapping) {
92
+ let processedContent = content;
93
+ let anchorsCreated = 0;
94
+
95
+ // Replace \label{...} with HTML anchor spans, but SKIP labels inside math environments
96
+ for (const [original, clean] of mapping) {
97
+ // Skip equation labels (they will be handled by the Lua filter)
98
+ if (original.startsWith('eq:')) {
99
+ continue;
100
+ }
101
+
102
+ const labelRegex = new RegExp(`\\\\label\\{${escapeRegex(original)}\\}`, 'g');
103
+ const labelMatches = processedContent.match(labelRegex);
104
+
105
+ if (labelMatches) {
106
+ // Replace \label{original} with HTML span anchor (invisible but accessible)
107
+ processedContent = processedContent.replace(labelRegex, `\n\n<span id="${clean}" style="position: absolute;"></span>\n\n`);
108
+ anchorsCreated += labelMatches.length;
109
+ }
110
+ }
111
+
112
+ return { content: processedContent, anchorsCreated };
113
+ }
114
+
115
+ /**
116
+ * Convert \highlight{...} commands to HTML spans with CSS class
117
+ * @param {string} content - LaTeX content
118
+ * @returns {Object} - Result with content and count of conversions
119
+ */
120
+ function convertHighlightCommands(content) {
121
+ let processedContent = content;
122
+ let highlightsConverted = 0;
123
+
124
+ // Replace \highlight{...} with <span class="highlight">...</span>
125
+ processedContent = processedContent.replace(/\\highlight\{([^}]+)\}/g, (match, text) => {
126
+ highlightsConverted++;
127
+ return `<span class="highlight">${text}</span>`;
128
+ });
129
+
130
+ return { content: processedContent, highlightsConverted };
131
+ }
132
+
133
+ /**
134
+ * Apply mapping to LaTeX content
135
+ * @param {string} content - Original LaTeX content
136
+ * @param {Map} mapping - Identifier mapping
137
+ * @returns {string} - Cleaned LaTeX content
138
+ */
139
+ function applyMapping(content, mapping) {
140
+ let cleanedContent = content;
141
+ let changesCount = 0;
142
+
143
+ // First, convert labels to anchor spans
144
+ const anchorResult = convertLabelsToAnchors(cleanedContent, mapping);
145
+ cleanedContent = anchorResult.content;
146
+ const anchorsCreated = anchorResult.anchorsCreated;
147
+
148
+ // Convert \highlight{} commands to spans
149
+ const highlightResult = convertHighlightCommands(cleanedContent);
150
+ cleanedContent = highlightResult.content;
151
+ const highlightsConverted = highlightResult.highlightsConverted;
152
+
153
+ // Then apply mapping to remaining references and equation labels
154
+ for (const [original, clean] of mapping) {
155
+ if (original !== clean) {
156
+ // Replace \ref{original} with \ref{clean}
157
+ const refRegex = new RegExp(`\\\\ref\\{${escapeRegex(original)}\\}`, 'g');
158
+ const refMatches = cleanedContent.match(refRegex);
159
+ if (refMatches) {
160
+ cleanedContent = cleanedContent.replace(refRegex, `\\ref{${clean}}`);
161
+ changesCount += refMatches.length;
162
+ }
163
+
164
+ // For equation labels, still clean the labels themselves (for the Lua filter)
165
+ if (original.startsWith('eq:')) {
166
+ const labelRegex = new RegExp(`\\\\label\\{${escapeRegex(original)}\\}`, 'g');
167
+ const labelMatches = cleanedContent.match(labelRegex);
168
+ if (labelMatches) {
169
+ cleanedContent = cleanedContent.replace(labelRegex, `\\label{${clean}}`);
170
+ changesCount += labelMatches.length;
171
+ }
172
+ }
173
+ }
174
+ }
175
+
176
+ return {
177
+ content: cleanedContent,
178
+ changesCount: changesCount + anchorsCreated,
179
+ highlightsConverted: highlightsConverted
180
+ };
181
+ }
182
+
183
+ /**
184
+ * Escape special regex characters
185
+ * @param {string} string - String to escape
186
+ * @returns {string} - Escaped string
187
+ */
188
+ function escapeRegex(string) {
189
+ return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
190
+ }
191
+
192
+ /**
193
+ * Main preprocessing function
194
+ * @param {string} latexContent - Original LaTeX content
195
+ * @returns {Object} - Result with cleaned content and statistics
196
+ */
197
+ export function preprocessLatexReferences(latexContent) {
198
+ console.log('🔧 Preprocessing LaTeX references for MDX compatibility...');
199
+
200
+ // 1. Extract all references
201
+ const references = extractReferences(latexContent);
202
+
203
+ console.log(` 📊 Found: ${references.labels.size} labels, ${references.refs.size} refs`);
204
+
205
+ // 2. Create clean mapping
206
+ const mapping = createCleanMapping(references);
207
+
208
+ // 3. Apply mapping
209
+ const result = applyMapping(latexContent, mapping);
210
+
211
+ if (result.changesCount > 0) {
212
+ console.log(` ✅ Processed ${result.changesCount} reference(s) and created anchor spans`);
213
+
214
+ // Show some examples of changes
215
+ let exampleCount = 0;
216
+ for (const [original, clean] of mapping) {
217
+ if (original !== clean && exampleCount < 3) {
218
+ console.log(` ${original} → ${clean} (span + refs)`);
219
+ exampleCount++;
220
+ }
221
+ }
222
+ if (mapping.size > 3) {
223
+ console.log(` ... and ${mapping.size - 3} more anchor spans created`);
224
+ }
225
+ } else {
226
+ console.log(' ℹ️ No reference cleanup needed');
227
+ }
228
+
229
+ if (result.highlightsConverted > 0) {
230
+ console.log(` ✨ Converted ${result.highlightsConverted} \\highlight{} command(s) to <span class="highlight">`);
231
+ }
232
+
233
+ return {
234
+ content: result.content,
235
+ changesCount: result.changesCount,
236
+ mapping: mapping,
237
+ references: references
238
+ };
239
+ }
app/scripts/notion-importer/.cursorignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
app/scripts/notion-importer/.notion-to-md/media/27877f1c-9c9d-804d-9c82-f7b3905578ff_media.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bd4ab2fce404409575347c4d1941d7934aa5022407e2c91098bb2c31a0faed9
3
+ size 36783
app/scripts/notion-importer/custom-code-renderer.mjs ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env node
2
+
3
+ /**
4
+ * Custom Code Block Renderer for notion-to-md
5
+ * Fixes the issue where code blocks end with "text" instead of proper closing
6
+ */
7
+
8
+ export function createCustomCodeRenderer() {
9
+ return {
10
+ name: 'custom-code-renderer',
11
+ type: 'renderer',
12
+
13
+ /**
14
+ * Custom renderer for code blocks
15
+ * @param {Object} block - Notion code block
16
+ * @returns {string} - Properly formatted markdown code block
17
+ */
18
+ code: (block) => {
19
+ const { language, rich_text } = block.code;
20
+
21
+ // Extract the actual code content from rich_text
22
+ const codeContent = rich_text
23
+ .map(text => text.plain_text)
24
+ .join('');
25
+
26
+ // Determine the language (default to empty string if not specified)
27
+ const lang = language || '';
28
+
29
+ // Return properly formatted markdown code block
30
+ return `\`\`\`${lang}\n${codeContent}\n\`\`\``;
31
+ }
32
+ };
33
+ }
app/scripts/notion-importer/debug-properties.mjs ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env node
2
+
3
+ import { config } from 'dotenv';
4
+ import { Client } from '@notionhq/client';
5
+
6
+ // Load environment variables from .env file
7
+ config();
8
+
9
+ const notion = new Client({
10
+ auth: process.env.NOTION_TOKEN,
11
+ });
12
+
13
+ async function debugPageProperties() {
14
+ const pageId = '27877f1c9c9d804d9c82f7b3905578ff';
15
+
16
+ try {
17
+ console.log('🔍 Debugging page properties...');
18
+ console.log(`📄 Page ID: ${pageId}`);
19
+
20
+ const page = await notion.pages.retrieve({ page_id: pageId });
21
+
22
+ console.log('\n📋 Available properties:');
23
+ console.log('========================');
24
+
25
+ for (const [key, value] of Object.entries(page.properties)) {
26
+ console.log(`\n🔹 ${key}:`);
27
+ console.log(` Type: ${value.type}`);
28
+
29
+ switch (value.type) {
30
+ case 'title':
31
+ console.log(` Value: "${value.title.map(t => t.plain_text).join('')}"`);
32
+ break;
33
+ case 'rich_text':
34
+ console.log(` Value: "${value.rich_text.map(t => t.plain_text).join('')}"`);
35
+ break;
36
+ case 'people':
37
+ console.log(` People: ${value.people.map(p => p.name || p.id).join(', ')}`);
38
+ break;
39
+ case 'select':
40
+ console.log(` Value: ${value.select?.name || 'null'}`);
41
+ break;
42
+ case 'multi_select':
43
+ console.log(` Values: [${value.multi_select.map(s => s.name).join(', ')}]`);
44
+ break;
45
+ case 'date':
46
+ console.log(` Value: ${value.date?.start || 'null'}`);
47
+ break;
48
+ case 'checkbox':
49
+ console.log(` Value: ${value.checkbox}`);
50
+ break;
51
+ case 'url':
52
+ console.log(` Value: ${value.url || 'null'}`);
53
+ break;
54
+ case 'email':
55
+ console.log(` Value: ${value.email || 'null'}`);
56
+ break;
57
+ case 'phone_number':
58
+ console.log(` Value: ${value.phone_number || 'null'}`);
59
+ break;
60
+ case 'number':
61
+ console.log(` Value: ${value.number || 'null'}`);
62
+ break;
63
+ case 'created_time':
64
+ console.log(` Value: ${value.created_time}`);
65
+ break;
66
+ case 'created_by':
67
+ console.log(` Value: ${value.created_by?.id || 'null'}`);
68
+ break;
69
+ case 'last_edited_time':
70
+ console.log(` Value: ${value.last_edited_time}`);
71
+ break;
72
+ case 'last_edited_by':
73
+ console.log(` Value: ${value.last_edited_by?.id || 'null'}`);
74
+ break;
75
+ default:
76
+ console.log(` Value: ${JSON.stringify(value, null, 2)}`);
77
+ }
78
+ }
79
+
80
+ console.log('\n✅ Properties debug completed!');
81
+
82
+ } catch (error) {
83
+ console.error('❌ Error:', error.message);
84
+ }
85
+ }
86
+
87
+ debugPageProperties();
app/scripts/notion-importer/input/pages.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d51fba4ce9b05562f5df611a150e3cd702b487d2e608441318336556e0f248a
3
+ size 188
app/scripts/notion-importer/mdx-converter.mjs ADDED
@@ -0,0 +1,551 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env node
2
+
3
+ import { readFileSync, writeFileSync, existsSync, mkdirSync, readdirSync, statSync } from 'fs';
4
+ import { join, dirname, basename, extname } from 'path';
5
+ import { fileURLToPath } from 'url';
6
+ import matter from 'gray-matter';
7
+ import { extractAndGenerateNotionFrontmatter } from './notion-metadata-extractor.mjs';
8
+
9
+ const __filename = fileURLToPath(import.meta.url);
10
+ const __dirname = dirname(__filename);
11
+
12
+ // Configuration
13
+ const DEFAULT_INPUT = join(__dirname, 'output');
14
+ const DEFAULT_OUTPUT = join(__dirname, 'output');
15
+
16
+ function parseArgs() {
17
+ const args = process.argv.slice(2);
18
+ const config = {
19
+ input: DEFAULT_INPUT,
20
+ output: DEFAULT_OUTPUT,
21
+ };
22
+
23
+ for (const arg of args) {
24
+ if (arg.startsWith('--input=')) {
25
+ config.input = arg.substring('--input='.length);
26
+ } else if (arg.startsWith('--output=')) {
27
+ config.output = arg.substring('--output='.length);
28
+ } else if (arg === '--help' || arg === '-h') {
29
+ console.log(`
30
+ 📝 Notion Markdown to MDX Converter
31
+
32
+ Usage:
33
+ node mdx-converter.mjs [options]
34
+
35
+ Options:
36
+ --input=PATH Input directory or file (default: ${DEFAULT_INPUT})
37
+ --output=PATH Output directory (default: ${DEFAULT_OUTPUT})
38
+ --help, -h Show this help
39
+
40
+ Examples:
41
+ # Convert all markdown files in output directory
42
+ node mdx-converter.mjs
43
+
44
+ # Convert specific file
45
+ node mdx-converter.mjs --input=article.md --output=converted/
46
+
47
+ # Convert directory
48
+ node mdx-converter.mjs --input=markdown-files/ --output=mdx-files/
49
+ `);
50
+ process.exit(0);
51
+ } else if (!config.input) {
52
+ config.input = arg;
53
+ } else if (!config.output) {
54
+ config.output = arg;
55
+ }
56
+ }
57
+ return config;
58
+ }
59
+
60
+ /**
61
+ * Track which Astro components are used during transformations
62
+ */
63
+ const usedComponents = new Set();
64
+
65
+ /**
66
+ * Track individual image imports needed
67
+ */
68
+ const imageImports = new Map(); // src -> varName
69
+
70
+ /**
71
+ * Generate a variable name from image path
72
+ * @param {string} src - Image source path
73
+ * @returns {string} - Valid variable name
74
+ */
75
+ function generateImageVarName(src) {
76
+ // Extract filename without extension and make it a valid JS variable
77
+ const filename = src.split('/').pop().replace(/\.[^.]+$/, '');
78
+ return filename.replace(/[^a-zA-Z0-9]/g, '_').replace(/^[0-9]/, 'img_$&');
79
+ }
80
+
81
+ /**
82
+ * Add required component imports to the frontmatter
83
+ * @param {string} content - MDX content
84
+ * @returns {string} - Content with component imports
85
+ */
86
+ function addComponentImports(content) {
87
+ console.log(' 📦 Adding component and image imports...');
88
+
89
+ let imports = [];
90
+
91
+ // Add component imports
92
+ if (usedComponents.size > 0) {
93
+ const componentImports = Array.from(usedComponents)
94
+ .map(component => `import ${component} from '../components/${component}.astro';`);
95
+ imports.push(...componentImports);
96
+ console.log(` ✅ Importing components: ${Array.from(usedComponents).join(', ')}`);
97
+ }
98
+
99
+ // Add image imports
100
+ if (imageImports.size > 0) {
101
+ const imageImportStatements = Array.from(imageImports.entries())
102
+ .map(([src, varName]) => `import ${varName} from '${src}';`);
103
+ imports.push(...imageImportStatements);
104
+ console.log(` ✅ Importing ${imageImports.size} image(s)`);
105
+ }
106
+
107
+ if (imports.length === 0) {
108
+ console.log(' ℹ️ No imports needed');
109
+ return content;
110
+ }
111
+
112
+ const importBlock = imports.join('\n');
113
+
114
+ // Insert imports after frontmatter
115
+ const frontmatterEnd = content.indexOf('---', 3) + 3;
116
+ if (frontmatterEnd > 2) {
117
+ return content.slice(0, frontmatterEnd) + '\n\n' + importBlock + '\n' + content.slice(frontmatterEnd);
118
+ } else {
119
+ // No frontmatter, add at beginning
120
+ return importBlock + '\n\n' + content;
121
+ }
122
+ }
123
+
124
+ /**
125
+ * Transform Notion images to Figure components
126
+ * @param {string} content - MDX content
127
+ * @returns {string} - Content with Figure components
128
+ */
129
+ function transformImages(content) {
130
+ console.log(' 🖼️ Transforming images to Figure components...');
131
+
132
+ let hasImages = false;
133
+
134
+ // Helper function to clean source paths
135
+ const cleanSrcPath = (src) => {
136
+ // Convert Notion media paths to relative paths
137
+ return src.replace(/^\/media\//, './media/')
138
+ .replace(/^\.\/media\//, './media/');
139
+ };
140
+
141
+ // Helper to clean caption text
142
+ const cleanCaption = (caption) => {
143
+ return caption
144
+ .replace(/<[^>]*>/g, '') // Remove HTML tags
145
+ .replace(/\n/g, ' ') // Replace newlines with spaces
146
+ .replace(/\r/g, ' ') // Replace carriage returns with spaces
147
+ .replace(/\s+/g, ' ') // Replace multiple spaces with single space
148
+ .replace(/'/g, "\\'") // Escape quotes
149
+ .trim(); // Trim whitespace
150
+ };
151
+
152
+ // Helper to clean alt text
153
+ const cleanAltText = (alt, maxLength = 100) => {
154
+ const cleaned = alt
155
+ .replace(/<[^>]*>/g, '') // Remove HTML tags
156
+ .replace(/\n/g, ' ') // Replace newlines with spaces
157
+ .replace(/\r/g, ' ') // Replace carriage returns with spaces
158
+ .replace(/\s+/g, ' ') // Replace multiple spaces with single space
159
+ .trim(); // Trim whitespace
160
+
161
+ return cleaned.length > maxLength
162
+ ? cleaned.substring(0, maxLength) + '...'
163
+ : cleaned;
164
+ };
165
+
166
+ // Create Figure component with import
167
+ const createFigureComponent = (src, alt = '', caption = '') => {
168
+ const cleanSrc = cleanSrcPath(src);
169
+
170
+ // Skip PDF URLs and external URLs - they should remain as links only
171
+ if (cleanSrc.includes('.pdf') || cleanSrc.includes('arxiv.org/pdf') ||
172
+ (cleanSrc.startsWith('http') && !cleanSrc.includes('/media/'))) {
173
+ console.log(` ⚠️ Skipping external/PDF URL: ${cleanSrc}`);
174
+ // Return the original markdown image syntax for external URLs
175
+ return `![${alt}](${src})`;
176
+ }
177
+
178
+ const varName = generateImageVarName(cleanSrc);
179
+ imageImports.set(cleanSrc, varName);
180
+ usedComponents.add('Figure');
181
+
182
+ const props = [];
183
+ props.push(`src={${varName}}`);
184
+ props.push('zoomable');
185
+ props.push('downloadable');
186
+ props.push('layout="fixed"');
187
+ if (alt) props.push(`alt="${alt}"`);
188
+ if (caption) props.push(`caption={'${caption}'}`);
189
+
190
+ return `<Figure\n ${props.join('\n ')}\n/>`;
191
+ };
192
+
193
+ // Transform markdown images: ![alt](src)
194
+ content = content.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, (match, alt, src) => {
195
+ const cleanSrc = cleanSrcPath(src);
196
+ const cleanAlt = cleanAltText(alt || 'Figure');
197
+ hasImages = true;
198
+
199
+ return createFigureComponent(cleanSrc, cleanAlt);
200
+ });
201
+
202
+ // Transform images with captions (Notion sometimes adds captions as separate text)
203
+ content = content.replace(/!\[([^\]]*)\]\(([^)]+)\)\s*\n\s*([^\n]+)/g, (match, alt, src, caption) => {
204
+ const cleanSrc = cleanSrcPath(src);
205
+ const cleanAlt = cleanAltText(alt || 'Figure');
206
+ const cleanCap = cleanCaption(caption);
207
+ hasImages = true;
208
+
209
+ return createFigureComponent(cleanSrc, cleanAlt, cleanCap);
210
+ });
211
+
212
+ if (hasImages) {
213
+ console.log(' ✅ Figure components with imports will be created');
214
+ }
215
+
216
+ return content;
217
+ }
218
+
219
+ /**
220
+ * Transform Notion callouts to Note components
221
+ * @param {string} content - MDX content
222
+ * @returns {string} - Content with Note components
223
+ */
224
+ function transformCallouts(content) {
225
+ console.log(' 📝 Transforming callouts to Note components...');
226
+
227
+ let transformedCount = 0;
228
+
229
+ // Transform blockquotes that look like Notion callouts
230
+ content = content.replace(/^> \*\*([^*]+)\*\*\s*\n> (.+?)(?=\n> \*\*|\n\n|\n$)/gms, (match, title, content) => {
231
+ transformedCount++;
232
+ usedComponents.add('Note');
233
+
234
+ const cleanContent = content
235
+ .replace(/^> /gm, '') // Remove blockquote markers
236
+ .replace(/\n+/g, '\n') // Normalize newlines
237
+ .trim();
238
+
239
+ return `<Note type="${title.toLowerCase()}" title="${title}">\n${cleanContent}\n</Note>\n\n`;
240
+ });
241
+
242
+ if (transformedCount > 0) {
243
+ console.log(` ✅ Transformed ${transformedCount} callout(s) to Note components`);
244
+ }
245
+
246
+ return content;
247
+ }
248
+
249
+ /**
250
+ * Transform Notion databases/tables to enhanced table components
251
+ * @param {string} content - MDX content
252
+ * @returns {string} - Content with enhanced tables
253
+ */
254
+ function transformTables(content) {
255
+ console.log(' 📊 Enhancing tables...');
256
+
257
+ let enhancedCount = 0;
258
+
259
+ // Wrap tables in a container for better styling
260
+ content = content.replace(/^(\|[^|\n]+\|[\s\S]*?)(?=\n\n|\n$)/gm, (match) => {
261
+ if (match.includes('|') && match.split('\n').length > 2) {
262
+ enhancedCount++;
263
+ return `<div class="table-container">\n\n${match}\n\n</div>`;
264
+ }
265
+ return match;
266
+ });
267
+
268
+ if (enhancedCount > 0) {
269
+ console.log(` ✅ Enhanced ${enhancedCount} table(s)`);
270
+ }
271
+
272
+ return content;
273
+ }
274
+
275
+ /**
276
+ * Transform Notion code blocks to enhanced code components
277
+ * @param {string} content - MDX content
278
+ * @returns {string} - Content with enhanced code blocks
279
+ */
280
+ function transformCodeBlocks(content) {
281
+ console.log(' 💻 Enhancing code blocks...');
282
+
283
+ let enhancedCount = 0;
284
+
285
+ // Add copy functionality to code blocks
286
+ content = content.replace(/^```(\w+)\n([\s\S]*?)\n```$/gm, (match, lang, code) => {
287
+ enhancedCount++;
288
+ return `\`\`\`${lang} copy\n${code}\n\`\`\``;
289
+ });
290
+
291
+ if (enhancedCount > 0) {
292
+ console.log(` ✅ Enhanced ${enhancedCount} code block(s)`);
293
+ }
294
+
295
+ return content;
296
+ }
297
+
298
+ /**
299
+ * Fix Notion-specific formatting issues
300
+ * @param {string} content - MDX content
301
+ * @returns {string} - Content with fixed formatting
302
+ */
303
+ function fixNotionFormatting(content) {
304
+ console.log(' 🔧 Fixing Notion formatting issues...');
305
+
306
+ let fixedCount = 0;
307
+
308
+ // Fix Notion's toggle lists that don't convert well
309
+ content = content.replace(/^(\s*)•\s*(.+)$/gm, (match, indent, text) => {
310
+ fixedCount++;
311
+ return `${indent}- ${text}`;
312
+ });
313
+
314
+ // Fix Notion's numbered lists that might have issues
315
+ content = content.replace(/^(\s*)\d+\.\s*(.+)$/gm, (match, indent, text) => {
316
+ // Only fix if it's not already properly formatted
317
+ if (!text.includes('\n') || text.split('\n').length === 1) {
318
+ return match; // Keep as is
319
+ }
320
+ fixedCount++;
321
+ return `${indent}1. ${text}`;
322
+ });
323
+
324
+ // Fix Notion's bold/italic combinations
325
+ content = content.replace(/\*\*([^*]+)\*\*([^*]+)\*\*([^*]+)\*\*/g, (match, part1, part2, part3) => {
326
+ fixedCount++;
327
+ return `**${part1}${part2}${part3}**`;
328
+ });
329
+
330
+ if (fixedCount > 0) {
331
+ console.log(` ✅ Fixed ${fixedCount} formatting issue(s)`);
332
+ }
333
+
334
+ return content;
335
+ }
336
+
337
+ /**
338
+ * Ensure proper frontmatter for MDX with Notion metadata
339
+ * @param {string} content - MDX content
340
+ * @param {string} pageId - Notion page ID (optional)
341
+ * @param {string} notionToken - Notion API token (optional)
342
+ * @returns {string} - Content with proper frontmatter
343
+ */
344
+ async function ensureFrontmatter(content, pageId = null, notionToken = null) {
345
+ console.log(' 📄 Ensuring proper frontmatter...');
346
+
347
+ if (!content.startsWith('---')) {
348
+ let frontmatter;
349
+
350
+ if (pageId && notionToken) {
351
+ try {
352
+ console.log(' 🔍 Extracting Notion metadata...');
353
+ frontmatter = await extractAndGenerateNotionFrontmatter(pageId, notionToken);
354
+ console.log(' ✅ Generated rich frontmatter from Notion');
355
+ } catch (error) {
356
+ console.log(' ⚠️ Failed to extract Notion metadata, using basic frontmatter');
357
+ frontmatter = generateBasicFrontmatter();
358
+ }
359
+ } else {
360
+ frontmatter = generateBasicFrontmatter();
361
+ console.log(' ✅ Generated basic frontmatter');
362
+ }
363
+
364
+ return frontmatter + content;
365
+ }
366
+
367
+ // Parse existing frontmatter and enhance it
368
+ try {
369
+ const { data, content: body } = matter(content);
370
+
371
+ // If we have Notion metadata available, try to enhance the frontmatter
372
+ if (pageId && notionToken && (!data.notion_id || data.notion_id !== pageId)) {
373
+ try {
374
+ console.log(' 🔍 Enhancing frontmatter with Notion metadata...');
375
+ const notionFrontmatter = await extractAndGenerateNotionFrontmatter(pageId, notionToken);
376
+ const { data: notionData } = matter(notionFrontmatter);
377
+
378
+ // Merge Notion metadata with existing frontmatter
379
+ const enhancedData = { ...data, ...notionData };
380
+ const enhancedContent = matter.stringify(body, enhancedData);
381
+ console.log(' ✅ Enhanced frontmatter with Notion metadata');
382
+ return enhancedContent;
383
+ } catch (error) {
384
+ console.log(' ⚠️ Could not enhance with Notion metadata, keeping existing');
385
+ }
386
+ }
387
+
388
+ // Ensure required fields
389
+ if (!data.title) data.title = 'Notion Article';
390
+ if (!data.published) data.published = new Date().toISOString().split('T')[0];
391
+ if (!data.tableOfContentsAutoCollapse) data.tableOfContentsAutoCollapse = true;
392
+
393
+ const enhancedContent = matter.stringify(body, data);
394
+ console.log(' ✅ Enhanced existing frontmatter');
395
+ return enhancedContent;
396
+ } catch (error) {
397
+ console.log(' ⚠️ Could not parse frontmatter, keeping as is');
398
+ return content;
399
+ }
400
+ }
401
+
402
+ /**
403
+ * Generate basic frontmatter
404
+ * @returns {string} - Basic frontmatter
405
+ */
406
+ function generateBasicFrontmatter() {
407
+ const currentDate = new Date().toLocaleDateString('en-US', {
408
+ year: 'numeric',
409
+ month: 'short',
410
+ day: '2-digit'
411
+ });
412
+ return `---
413
+ title: "Notion Article"
414
+ published: "${currentDate}"
415
+ tableOfContentsAutoCollapse: true
416
+ ---
417
+
418
+ `;
419
+ }
420
+
421
+ /**
422
+ * Main MDX processing function that applies all transformations
423
+ * @param {string} content - Raw Markdown content
424
+ * @param {string} pageId - Notion page ID (optional)
425
+ * @param {string} notionToken - Notion API token (optional)
426
+ * @returns {string} - Processed MDX content compatible with Astro
427
+ */
428
+ async function processMdxContent(content, pageId = null, notionToken = null) {
429
+ console.log('🔧 Processing for Astro MDX compatibility...');
430
+
431
+ // Clear previous tracking
432
+ usedComponents.clear();
433
+ imageImports.clear();
434
+
435
+ let processedContent = content;
436
+
437
+ // Apply each transformation step sequentially
438
+ processedContent = await ensureFrontmatter(processedContent, pageId, notionToken);
439
+ processedContent = fixNotionFormatting(processedContent);
440
+ processedContent = transformCallouts(processedContent);
441
+ processedContent = transformImages(processedContent);
442
+ processedContent = transformTables(processedContent);
443
+ processedContent = transformCodeBlocks(processedContent);
444
+
445
+ // Add component imports at the end
446
+ processedContent = addComponentImports(processedContent);
447
+
448
+ return processedContent;
449
+ }
450
+
451
+ /**
452
+ * Convert a single markdown file to MDX
453
+ * @param {string} inputFile - Input markdown file
454
+ * @param {string} outputDir - Output directory
455
+ * @param {string} pageId - Notion page ID (optional)
456
+ * @param {string} notionToken - Notion API token (optional)
457
+ */
458
+ async function convertFileToMdx(inputFile, outputDir, pageId = null, notionToken = null) {
459
+ const filename = basename(inputFile, '.md');
460
+ const outputFile = join(outputDir, `${filename}.mdx`);
461
+
462
+ console.log(`📝 Converting: ${basename(inputFile)} → ${basename(outputFile)}`);
463
+
464
+ try {
465
+ const markdownContent = readFileSync(inputFile, 'utf8');
466
+ const mdxContent = await processMdxContent(markdownContent, pageId, notionToken);
467
+ writeFileSync(outputFile, mdxContent);
468
+
469
+ console.log(` ✅ Converted: ${outputFile}`);
470
+
471
+ // Show file size
472
+ const inputSize = Math.round(markdownContent.length / 1024);
473
+ const outputSize = Math.round(mdxContent.length / 1024);
474
+ console.log(` 📊 Input: ${inputSize}KB → Output: ${outputSize}KB`);
475
+
476
+ } catch (error) {
477
+ console.error(` ❌ Failed to convert ${inputFile}: ${error.message}`);
478
+ }
479
+ }
480
+
481
+ /**
482
+ * Convert all markdown files in a directory to MDX
483
+ * @param {string} inputPath - Input path (file or directory)
484
+ * @param {string} outputDir - Output directory
485
+ * @param {string} pageId - Notion page ID (optional)
486
+ * @param {string} notionToken - Notion API token (optional)
487
+ */
488
+ async function convertToMdx(inputPath, outputDir, pageId = null, notionToken = null) {
489
+ console.log('📝 Notion Markdown to Astro MDX Converter');
490
+ console.log(`📁 Input: ${inputPath}`);
491
+ console.log(`📁 Output: ${outputDir}`);
492
+
493
+ // Check if input exists
494
+ if (!existsSync(inputPath)) {
495
+ console.error(`❌ Input not found: ${inputPath}`);
496
+ process.exit(1);
497
+ }
498
+
499
+ try {
500
+ // Ensure output directory exists
501
+ if (!existsSync(outputDir)) {
502
+ mkdirSync(outputDir, { recursive: true });
503
+ }
504
+
505
+ let filesToConvert = [];
506
+
507
+ if (statSync(inputPath).isDirectory()) {
508
+ // Convert all .md files in directory
509
+ const files = readdirSync(inputPath);
510
+ filesToConvert = files
511
+ .filter(file => file.endsWith('.md'))
512
+ .map(file => join(inputPath, file));
513
+ } else if (inputPath.endsWith('.md')) {
514
+ // Convert single file
515
+ filesToConvert = [inputPath];
516
+ } else {
517
+ console.error('❌ Input must be a .md file or directory containing .md files');
518
+ process.exit(1);
519
+ }
520
+
521
+ if (filesToConvert.length === 0) {
522
+ console.log('ℹ️ No .md files found to convert');
523
+ return;
524
+ }
525
+
526
+ console.log(`🔄 Found ${filesToConvert.length} file(s) to convert`);
527
+
528
+ // Convert each file
529
+ for (const file of filesToConvert) {
530
+ await convertFileToMdx(file, outputDir, pageId, notionToken);
531
+ }
532
+
533
+ console.log(`✅ Conversion completed! ${filesToConvert.length} file(s) processed`);
534
+
535
+ } catch (error) {
536
+ console.error('❌ Conversion failed:', error.message);
537
+ process.exit(1);
538
+ }
539
+ }
540
+
541
+ export { convertToMdx };
542
+
543
+ function main() {
544
+ const config = parseArgs();
545
+ convertToMdx(config.input, config.output);
546
+ console.log('🎉 MDX conversion completed!');
547
+ }
548
+
549
+ if (import.meta.url === `file://${process.argv[1]}`) {
550
+ main();
551
+ }
app/scripts/notion-importer/notion-converter.mjs ADDED
@@ -0,0 +1,259 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env node
2
+
3
+ import { config } from 'dotenv';
4
+ import { Client } from '@notionhq/client';
5
+ import { NotionConverter } from 'notion-to-md';
6
+ import { DefaultExporter } from 'notion-to-md/plugins/exporter';
7
+ import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
8
+ import { join, dirname, basename } from 'path';
9
+ import { fileURLToPath } from 'url';
10
+ import { postProcessMarkdown } from './post-processor.mjs';
11
+ import { createCustomCodeRenderer } from './custom-code-renderer.mjs';
12
+
13
+ // Load environment variables from .env file
14
+ config();
15
+
16
+ const __filename = fileURLToPath(import.meta.url);
17
+ const __dirname = dirname(__filename);
18
+
19
+ // Configuration
20
+ const DEFAULT_INPUT = join(__dirname, 'input', 'pages.json');
21
+ const DEFAULT_OUTPUT = join(__dirname, 'output');
22
+
23
+ function parseArgs() {
24
+ const args = process.argv.slice(2);
25
+ const config = {
26
+ input: DEFAULT_INPUT,
27
+ output: DEFAULT_OUTPUT,
28
+ clean: false,
29
+ token: process.env.NOTION_TOKEN
30
+ };
31
+
32
+ for (const arg of args) {
33
+ if (arg.startsWith('--input=')) {
34
+ config.input = arg.split('=')[1];
35
+ } else if (arg.startsWith('--output=')) {
36
+ config.output = arg.split('=')[1];
37
+ } else if (arg.startsWith('--token=')) {
38
+ config.token = arg.split('=')[1];
39
+ } else if (arg === '--clean') {
40
+ config.clean = true;
41
+ }
42
+ }
43
+
44
+ return config;
45
+ }
46
+
47
+ function ensureDirectory(dir) {
48
+ if (!existsSync(dir)) {
49
+ mkdirSync(dir, { recursive: true });
50
+ }
51
+ }
52
+
53
+ function loadPagesConfig(configFile) {
54
+ if (!existsSync(configFile)) {
55
+ console.error(`❌ Configuration file not found: ${configFile}`);
56
+ console.log('📝 Create a pages.json file with your Notion page IDs:');
57
+ console.log(`
58
+ {
59
+ "pages": [
60
+ {
61
+ "id": "your-notion-page-id-1",
62
+ "title": "Page Title 1",
63
+ "slug": "page-1"
64
+ },
65
+ {
66
+ "id": "your-notion-page-id-2",
67
+ "title": "Page Title 2",
68
+ "slug": "page-2"
69
+ }
70
+ ]
71
+ }
72
+ `);
73
+ process.exit(1);
74
+ }
75
+
76
+ try {
77
+ const config = JSON.parse(readFileSync(configFile, 'utf8'));
78
+ return config.pages || [];
79
+ } catch (error) {
80
+ console.error(`❌ Error reading configuration: ${error.message}`);
81
+ process.exit(1);
82
+ }
83
+ }
84
+
85
+ /**
86
+ * Convert a single Notion page to Markdown with advanced media handling
87
+ * @param {Object} notion - Notion client
88
+ * @param {string} pageId - Notion page ID
89
+ * @param {string} outputDir - Output directory
90
+ * @param {string} pageTitle - Page title for file naming
91
+ * @returns {Promise<string>} - Path to generated markdown file
92
+ */
93
+ async function convertNotionPage(notion, pageId, outputDir, pageTitle) {
94
+ console.log(`📄 Converting Notion page: ${pageTitle} (${pageId})`);
95
+
96
+ try {
97
+ // Create media directory for this page
98
+ const mediaDir = join(outputDir, 'media', pageId);
99
+ ensureDirectory(mediaDir);
100
+
101
+ // Configure the DefaultExporter to save to a file
102
+ const outputFile = join(outputDir, `${pageTitle}.md`);
103
+ const exporter = new DefaultExporter({
104
+ outputType: 'file',
105
+ outputPath: outputFile,
106
+ });
107
+
108
+ // Create the converter with media downloading strategy
109
+ const n2m = new NotionConverter(notion)
110
+ .withExporter(exporter)
111
+ // Download media to local directory with path transformation
112
+ .downloadMediaTo({
113
+ outputDir: mediaDir,
114
+ // Transform paths to be web-accessible
115
+ transformPath: (localPath) => `/media/${pageId}/${basename(localPath)}`,
116
+ });
117
+
118
+ // Convert the page
119
+ const result = await n2m.convert(pageId);
120
+
121
+ console.log(` ✅ Converted to: ${outputFile}`);
122
+ console.log(` 📊 Content length: ${result.content.length} characters`);
123
+ console.log(` 🖼️ Media saved to: ${mediaDir}`);
124
+
125
+ return outputFile;
126
+
127
+ } catch (error) {
128
+ console.error(` ❌ Failed to convert page ${pageId}: ${error.message}`);
129
+ throw error;
130
+ }
131
+ }
132
+
133
+ /**
134
+ * Process Notion pages with advanced configuration
135
+ * @param {string} inputFile - Path to pages configuration
136
+ * @param {string} outputDir - Output directory
137
+ * @param {string} notionToken - Notion API token
138
+ */
139
+ export async function convertNotionToMarkdown(inputFile, outputDir, notionToken) {
140
+ console.log('🚀 Notion to Markdown Converter');
141
+ console.log(`📁 Input: ${inputFile}`);
142
+ console.log(`📁 Output: ${outputDir}`);
143
+
144
+ // Validate Notion token
145
+ if (!notionToken) {
146
+ console.error('❌ NOTION_TOKEN not found. Please set it as environment variable or use --token=YOUR_TOKEN');
147
+ process.exit(1);
148
+ }
149
+
150
+ // Ensure output directory exists
151
+ ensureDirectory(outputDir);
152
+
153
+ try {
154
+ // Initialize Notion client
155
+ const notion = new Client({
156
+ auth: notionToken,
157
+ });
158
+
159
+ // Load pages configuration
160
+ const pages = loadPagesConfig(inputFile);
161
+ console.log(`📋 Found ${pages.length} page(s) to convert`);
162
+
163
+ const convertedFiles = [];
164
+
165
+ // Convert each page
166
+ for (const page of pages) {
167
+ try {
168
+ const outputFile = await convertNotionPage(
169
+ notion,
170
+ page.id,
171
+ outputDir,
172
+ page.slug || page.title?.toLowerCase().replace(/\s+/g, '-') || page.id
173
+ );
174
+ convertedFiles.push(outputFile);
175
+ } catch (error) {
176
+ console.error(`❌ Failed to convert page ${page.id}: ${error.message}`);
177
+ // Continue with other pages
178
+ }
179
+ }
180
+
181
+ // Post-process all converted files
182
+ console.log('🔧 Post-processing converted files...');
183
+ for (const file of convertedFiles) {
184
+ try {
185
+ let content = readFileSync(file, 'utf8');
186
+ content = postProcessMarkdown(content);
187
+ writeFileSync(file, content);
188
+ console.log(` ✅ Post-processed: ${basename(file)}`);
189
+ } catch (error) {
190
+ console.error(` ❌ Failed to post-process ${file}: ${error.message}`);
191
+ }
192
+ }
193
+
194
+ console.log(`✅ Conversion completed! ${convertedFiles.length} file(s) generated`);
195
+
196
+ } catch (error) {
197
+ console.error('❌ Conversion failed:', error.message);
198
+ process.exit(1);
199
+ }
200
+ }
201
+
202
+ function main() {
203
+ const config = parseArgs();
204
+
205
+ if (config.clean) {
206
+ console.log('🧹 Cleaning output directory...');
207
+ // Clean output directory logic would go here
208
+ }
209
+
210
+ convertNotionToMarkdown(config.input, config.output, config.token);
211
+ console.log('🎉 Notion conversion completed!');
212
+ }
213
+
214
+ // Show help if requested
215
+ if (process.argv.includes('--help') || process.argv.includes('-h')) {
216
+ console.log(`
217
+ 🚀 Notion to Markdown Converter
218
+
219
+ Usage:
220
+ node notion-converter.mjs [options]
221
+
222
+ Options:
223
+ --input=PATH Input pages configuration file (default: input/pages.json)
224
+ --output=PATH Output directory (default: output/)
225
+ --token=TOKEN Notion API token (or set NOTION_TOKEN env var)
226
+ --clean Clean output directory before conversion
227
+ --help, -h Show this help
228
+
229
+ Environment Variables:
230
+ NOTION_TOKEN Your Notion integration token
231
+
232
+ Examples:
233
+ # Basic conversion with environment token
234
+ NOTION_TOKEN=your_token node notion-converter.mjs
235
+
236
+ # Custom paths and token
237
+ node notion-converter.mjs --input=my-pages.json --output=converted/ --token=your_token
238
+
239
+ # Clean output first
240
+ node notion-converter.mjs --clean
241
+
242
+ Configuration File Format (pages.json):
243
+ {
244
+ "pages": [
245
+ {
246
+ "id": "your-notion-page-id",
247
+ "title": "Page Title",
248
+ "slug": "page-slug"
249
+ }
250
+ ]
251
+ }
252
+ `);
253
+ process.exit(0);
254
+ }
255
+
256
+ // Run CLI if called directly
257
+ if (import.meta.url === `file://${process.argv[1]}`) {
258
+ main();
259
+ }
app/scripts/notion-importer/notion-metadata-extractor.mjs ADDED
@@ -0,0 +1,303 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env node
2
+
3
+ import { Client } from '@notionhq/client';
4
+
5
+ /**
6
+ * Notion Metadata Extractor
7
+ * Extracts document metadata from Notion pages for frontmatter generation
8
+ */
9
+
10
+ /**
11
+ * Extract metadata from Notion page
12
+ * @param {string} pageId - Notion page ID
13
+ * @param {string} notionToken - Notion API token
14
+ * @returns {object} - Extracted metadata object
15
+ */
16
+ export async function extractNotionMetadata(pageId, notionToken) {
17
+ const notion = new Client({
18
+ auth: notionToken,
19
+ });
20
+
21
+ const metadata = {};
22
+
23
+ try {
24
+ // Get page information
25
+ const page = await notion.pages.retrieve({ page_id: pageId });
26
+
27
+ // Extract title from page properties
28
+ if (page.properties.title && page.properties.title.title && page.properties.title.title.length > 0) {
29
+ metadata.title = page.properties.title.title[0].plain_text;
30
+ }
31
+
32
+ // Extract creation date
33
+ if (page.created_time) {
34
+ metadata.published = new Date(page.created_time).toLocaleDateString('en-US', {
35
+ year: 'numeric',
36
+ month: 'short',
37
+ day: '2-digit'
38
+ });
39
+ metadata.created_time = page.created_time;
40
+ }
41
+
42
+ // Extract last edited date
43
+ if (page.last_edited_time) {
44
+ metadata.last_edited_time = page.last_edited_time;
45
+ }
46
+
47
+ // Extract created by
48
+ if (page.created_by && page.created_by.id) {
49
+ metadata.created_by = page.created_by.id;
50
+ }
51
+
52
+ // Extract last edited by
53
+ if (page.last_edited_by && page.last_edited_by.id) {
54
+ metadata.last_edited_by = page.last_edited_by.id;
55
+ }
56
+
57
+ // Extract page URL
58
+ metadata.notion_url = page.url;
59
+
60
+ // Extract page ID
61
+ metadata.notion_id = page.id;
62
+
63
+ // Extract parent information
64
+ if (page.parent) {
65
+ metadata.parent = {
66
+ type: page.parent.type,
67
+ id: page.parent[page.parent.type]?.id || page.parent[page.parent.type]
68
+ };
69
+ }
70
+
71
+ // Extract cover image if available
72
+ if (page.cover) {
73
+ metadata.cover = {
74
+ type: page.cover.type,
75
+ url: page.cover[page.cover.type]?.url || page.cover[page.cover.type]
76
+ };
77
+ }
78
+
79
+ // Extract icon if available
80
+ if (page.icon) {
81
+ metadata.icon = {
82
+ type: page.icon.type,
83
+ emoji: page.icon.emoji,
84
+ url: page.icon.external?.url || page.icon.file?.url
85
+ };
86
+ }
87
+
88
+ // Extract authors and custom properties
89
+ const customProperties = {};
90
+ for (const [key, value] of Object.entries(page.properties)) {
91
+ if (key !== 'title') { // Skip title as it's handled separately
92
+ const extractedValue = extractPropertyValue(value);
93
+
94
+ // Check for author-related properties
95
+ if (key.toLowerCase().includes('author') ||
96
+ key.toLowerCase().includes('writer') ||
97
+ key.toLowerCase().includes('creator') ||
98
+ value.type === 'people') {
99
+ metadata.authors = extractedValue;
100
+ } else {
101
+ customProperties[key] = extractedValue;
102
+ }
103
+ }
104
+ }
105
+
106
+ // If no authors found in properties, try to get from created_by
107
+ if (!metadata.authors && page.created_by) {
108
+ try {
109
+ const user = await notion.users.retrieve({ user_id: page.created_by.id });
110
+ metadata.authors = [{
111
+ name: user.name || user.id,
112
+ id: user.id
113
+ }];
114
+ } catch (error) {
115
+ console.log(' ⚠️ Could not fetch author from created_by:', error.message);
116
+ // Fallback to basic info
117
+ metadata.authors = [{
118
+ name: page.created_by.name || page.created_by.id,
119
+ id: page.created_by.id
120
+ }];
121
+ }
122
+ }
123
+
124
+ if (Object.keys(customProperties).length > 0) {
125
+ metadata.properties = customProperties;
126
+ }
127
+
128
+ // Try to extract description from page content (first paragraph)
129
+ try {
130
+ const blocks = await notion.blocks.children.list({ block_id: pageId });
131
+ const firstParagraph = blocks.results.find(block =>
132
+ block.type === 'paragraph' &&
133
+ block.paragraph.rich_text &&
134
+ block.paragraph.rich_text.length > 0
135
+ );
136
+
137
+ if (firstParagraph) {
138
+ const description = firstParagraph.paragraph.rich_text
139
+ .map(text => text.plain_text)
140
+ .join('')
141
+ .trim();
142
+
143
+ if (description && description.length > 0) {
144
+ metadata.description = description.substring(0, 200) + (description.length > 200 ? '...' : '');
145
+ }
146
+ }
147
+ } catch (error) {
148
+ console.log(' ⚠️ Could not extract description from page content');
149
+ }
150
+
151
+ // Generate tags from page properties
152
+ const tags = [];
153
+ for (const [key, value] of Object.entries(page.properties)) {
154
+ if (value.type === 'multi_select' && value.multi_select) {
155
+ value.multi_select.forEach(option => {
156
+ tags.push(option.name);
157
+ });
158
+ } else if (value.type === 'select' && value.select) {
159
+ tags.push(value.select.name);
160
+ }
161
+ }
162
+
163
+ if (tags.length > 0) {
164
+ metadata.tags = tags;
165
+ }
166
+
167
+ } catch (error) {
168
+ console.error('Error extracting Notion metadata:', error.message);
169
+ // Return basic metadata if extraction fails
170
+ metadata.title = "Notion Article";
171
+ metadata.published = new Date().toLocaleDateString('en-US', {
172
+ year: 'numeric',
173
+ month: 'short',
174
+ day: '2-digit'
175
+ });
176
+ }
177
+
178
+ return metadata;
179
+ }
180
+
181
+ /**
182
+ * Extract value from Notion property
183
+ * @param {object} property - Notion property object
184
+ * @returns {any} - Extracted value
185
+ */
186
+ function extractPropertyValue(property) {
187
+ switch (property.type) {
188
+ case 'rich_text':
189
+ return property.rich_text.map(text => text.plain_text).join('');
190
+ case 'title':
191
+ return property.title.map(text => text.plain_text).join('');
192
+ case 'number':
193
+ return property.number;
194
+ case 'select':
195
+ return property.select?.name || null;
196
+ case 'multi_select':
197
+ return property.multi_select.map(option => option.name);
198
+ case 'date':
199
+ return property.date?.start || null;
200
+ case 'checkbox':
201
+ return property.checkbox;
202
+ case 'url':
203
+ return property.url;
204
+ case 'email':
205
+ return property.email;
206
+ case 'phone_number':
207
+ return property.phone_number;
208
+ case 'created_time':
209
+ return property.created_time;
210
+ case 'created_by':
211
+ return property.created_by?.id || null;
212
+ case 'last_edited_time':
213
+ return property.last_edited_time;
214
+ case 'last_edited_by':
215
+ return property.last_edited_by?.id || null;
216
+ case 'people':
217
+ return property.people.map(person => ({
218
+ name: person.name || person.id,
219
+ id: person.id
220
+ }));
221
+ default:
222
+ return null;
223
+ }
224
+ }
225
+
226
+ /**
227
+ * Generate YAML frontmatter from metadata object
228
+ * @param {object} metadata - Metadata object
229
+ * @returns {string} - YAML frontmatter string
230
+ */
231
+ export function generateNotionFrontmatter(metadata) {
232
+ let frontmatter = '---\n';
233
+
234
+ // Title
235
+ if (metadata.title) {
236
+ frontmatter += `title: "${metadata.title}"\n`;
237
+ }
238
+
239
+ // Description
240
+ if (metadata.description) {
241
+ frontmatter += `description: "${metadata.description}"\n`;
242
+ }
243
+
244
+ // Publication date
245
+ if (metadata.published) {
246
+ frontmatter += `published: "${metadata.published}"\n`;
247
+ }
248
+
249
+ // Authors
250
+ if (metadata.authors && metadata.authors.length > 0) {
251
+ frontmatter += 'authors:\n';
252
+ metadata.authors.forEach(author => {
253
+ if (typeof author === 'string') {
254
+ frontmatter += ` - name: "${author}"\n`;
255
+ } else if (author.name) {
256
+ frontmatter += ` - name: "${author.name}"\n`;
257
+ }
258
+ });
259
+ }
260
+
261
+ // Tags
262
+ if (metadata.tags && metadata.tags.length > 0) {
263
+ frontmatter += 'tags:\n';
264
+ metadata.tags.forEach(tag => {
265
+ frontmatter += ` - "${tag}"\n`;
266
+ });
267
+ }
268
+
269
+ // Notion metadata removed - keeping only standard frontmatter fields
270
+
271
+ // Cover image
272
+ if (metadata.cover && metadata.cover.url) {
273
+ frontmatter += `cover: "${metadata.cover.url}"\n`;
274
+ }
275
+
276
+ // Icon
277
+ if (metadata.icon) {
278
+ if (metadata.icon.emoji) {
279
+ frontmatter += `icon: "${metadata.icon.emoji}"\n`;
280
+ } else if (metadata.icon.url) {
281
+ frontmatter += `icon: "${metadata.icon.url}"\n`;
282
+ }
283
+ }
284
+
285
+ // Custom properties removed - keeping frontmatter clean and standard
286
+
287
+ // Default Astro configuration
288
+ frontmatter += 'tableOfContentsAutoCollapse: true\n';
289
+ frontmatter += '---\n\n';
290
+
291
+ return frontmatter;
292
+ }
293
+
294
+ /**
295
+ * Extract and generate frontmatter from Notion page
296
+ * @param {string} pageId - Notion page ID
297
+ * @param {string} notionToken - Notion API token
298
+ * @returns {string} - Complete YAML frontmatter
299
+ */
300
+ export async function extractAndGenerateNotionFrontmatter(pageId, notionToken) {
301
+ const metadata = await extractNotionMetadata(pageId, notionToken);
302
+ return generateNotionFrontmatter(metadata);
303
+ }
app/scripts/notion-importer/output/.temp-pages.json ADDED
Binary file (128 Bytes). View file
 
app/scripts/notion-importer/output/media/27877f1c9c9d804d9c82f7b3905578ff/image_27877f1c-9c9d-8013-b668-f14bd1ac0ec0.png ADDED
app/scripts/notion-importer/output/media/27877f1c9c9d804d9c82f7b3905578ff/image_27877f1c-9c9d-8014-834f-d700b623256b.png ADDED
app/scripts/notion-importer/output/media/27877f1c9c9d804d9c82f7b3905578ff/image_27877f1c-9c9d-801d-841a-e35011491566.png ADDED
app/scripts/notion-importer/output/media/27877f1c9c9d804d9c82f7b3905578ff/image_27877f1c-9c9d-8031-ac8d-c5678af1bdd5.png ADDED
app/scripts/notion-importer/output/media/27877f1c9c9d804d9c82f7b3905578ff/image_27877f1c-9c9d-8048-9b7e-db4fa7485915.png ADDED
app/scripts/notion-importer/output/media/27877f1c9c9d804d9c82f7b3905578ff/image_27877f1c-9c9d-804d-bd0a-e0b1c15e504f.png ADDED
app/scripts/notion-importer/output/media/27877f1c9c9d804d9c82f7b3905578ff/image_27877f1c-9c9d-8075-ae2e-dc24fe9296ca.png ADDED