11import { GatsbyNode } from 'gatsby' ;
22import * as path from 'path' ;
33import * as fs from 'fs' ;
4+ import languageInfo from '../../src/data/languages/languageInfo' ;
45
56/**
67 * This script is used to create a file called llms.txt that contains a list of all the pages in the site.
@@ -11,6 +12,26 @@ const LLMS_TXT_PREAMBLE = `# https://ably.com/docs llms.txt\n`;
1112
1213const REPORTER_PREFIX = 'onPostBuild:' ;
1314
15+ // Valid languages for URL generation (matching your requirements)
16+ const VALID_LANGUAGES = [
17+ 'javascript' ,
18+ 'nodejs' ,
19+ 'csharp' ,
20+ 'flutter' ,
21+ 'java' ,
22+ 'objc' ,
23+ 'php' ,
24+ 'python' ,
25+ 'ruby' ,
26+ 'swift' ,
27+ 'go' ,
28+ ] ;
29+
30+ // Function to get the display label for a language
31+ const getLanguageLabel = ( languageKey : string ) : string => {
32+ return languageInfo [ languageKey as keyof typeof languageInfo ] ?. label || languageKey ;
33+ } ;
34+
1435interface DocumentQueryResult {
1536 site : {
1637 siteMetadata : {
@@ -24,6 +45,7 @@ interface DocumentQueryResult {
2445 meta : {
2546 title : string ;
2647 meta_description : string ;
48+ languages ?: string [ ] ;
2749 } ;
2850 } ;
2951 } [ ] ;
@@ -38,6 +60,9 @@ interface DocumentQueryResult {
3860 title ?: string ;
3961 meta_description ?: string ;
4062 } ;
63+ internal : {
64+ contentFilePath ?: string ;
65+ } ;
4166 } [ ] ;
4267 } ;
4368}
@@ -53,6 +78,34 @@ const escapeMarkdown = (text: string) => {
5378 return text . replace ( / ( [ \\ ` * _ { } [ \] ( ) # + ! ] ) / g, '\\$1' ) ;
5479} ;
5580
81+ // Function to extract code element classes from an MDX file
82+ const extractCodeLanguages = async ( filePath : string ) : Promise < Set < string > > => {
83+ try {
84+ // Check if the file exists
85+ if ( ! fs . existsSync ( filePath ) ) {
86+ return new Set ( ) ;
87+ }
88+
89+ // Read the file content
90+ const fileContent = fs . readFileSync ( filePath , 'utf8' ) ;
91+
92+ // Find all instances of code blocks with language specifiers (```language)
93+ const codeBlockRegex = / ` ` ` ( \w + ) / g;
94+ let match ;
95+ const languages = new Set < string > ( ) ;
96+
97+ while ( ( match = codeBlockRegex . exec ( fileContent ) ) !== null ) {
98+ if ( match [ 1 ] && match [ 1 ] . trim ( ) ) {
99+ languages . add ( match [ 1 ] . trim ( ) ) ;
100+ }
101+ }
102+ return languages ;
103+ } catch ( error ) {
104+ console . error ( `Error extracting code element classes from ${ filePath } :` , error ) ;
105+ return new Set ( ) ;
106+ }
107+ } ;
108+
56109export const onPostBuild : GatsbyNode [ 'onPostBuild' ] = async ( { graphql, reporter, basePath } ) => {
57110 const query = `
58111 query {
@@ -62,13 +115,14 @@ export const onPostBuild: GatsbyNode['onPostBuild'] = async ({ graphql, reporter
62115 }
63116 }
64117
65- allFileHtml(filter: { articleType: { in: ["document", "apiReference"] } }) {
118+ allFileHtml {
66119 edges {
67120 node {
68121 slug
69122 meta {
70123 title
71124 meta_description
125+ languages
72126 }
73127 }
74128 }
@@ -86,6 +140,9 @@ export const onPostBuild: GatsbyNode['onPostBuild'] = async ({ graphql, reporter
86140 title
87141 meta_description
88142 }
143+ internal {
144+ contentFilePath
145+ }
89146 }
90147 }
91148 }
@@ -109,30 +166,50 @@ export const onPostBuild: GatsbyNode['onPostBuild'] = async ({ graphql, reporter
109166 throw new Error ( 'Site URL not found.' ) ;
110167 }
111168
112- // Process textile-based pages (allFileHtml)
113- const textilePages = queryRecords . allFileHtml . edges . map ( ( edge ) => edge . node ) ;
114-
115- // Process MDX pages (allMdx)
116- const mdxPages = queryRecords . allMdx . nodes
117- . filter ( ( node ) => {
118- // Only include pages from docs directory that have the required frontmatter
119- return (
120- node . parent . relativeDirectory . startsWith ( 'docs' ) &&
121- node . frontmatter ?. title &&
122- node . frontmatter ?. meta_description
123- ) ;
124- } )
125- . map ( ( node ) => ( {
126- // Create slug from parent file info - remove 'docs/' prefix since it's already in relativeDirectory
127- slug : ( node . parent . relativeDirectory + ( node . parent . name === 'index' ? '' : `/${ node . parent . name } ` ) ) . replace (
128- / ^ d o c s \/ / ,
129- '' ,
130- ) ,
131- meta : {
132- title : node . frontmatter . title ! ,
133- meta_description : node . frontmatter . meta_description ! ,
134- } ,
135- } ) ) ;
169+ // Process textile-based pages (allFileHtml) and extract languages
170+ const textilePages = queryRecords . allFileHtml . edges . map ( ( edge ) => {
171+ // Extract valid languages from the meta.languages field
172+ const metaLanguages = edge . node . meta . languages || [ ] ;
173+ const languages = metaLanguages . filter ( ( lang ) => VALID_LANGUAGES . includes ( lang ) ) ;
174+
175+ return {
176+ ...edge . node ,
177+ languages,
178+ } ;
179+ } ) ;
180+
181+ // Process MDX pages (allMdx) and extract languages from files
182+ const mdxPages = await Promise . all (
183+ queryRecords . allMdx . nodes
184+ . filter ( ( node ) => {
185+ // Only include pages from docs directory that have the required frontmatter
186+ return (
187+ node . parent . relativeDirectory . startsWith ( 'docs' ) &&
188+ node . frontmatter ?. title &&
189+ node . frontmatter ?. meta_description
190+ ) ;
191+ } )
192+ . map ( async ( node ) => {
193+ // Create slug from parent file info - remove 'docs/' prefix since it's already in relativeDirectory
194+ const slug = (
195+ node . parent . relativeDirectory + ( node . parent . name === 'index' ? '' : `/${ node . parent . name } ` )
196+ ) . replace ( / ^ d o c s \/ / , '' ) ;
197+
198+ // Extract valid languages from the file content
199+ const filePath = node . internal . contentFilePath || '' ;
200+ const detectedLanguages = await extractCodeLanguages ( filePath ) ;
201+ const languages = Array . from ( detectedLanguages ) . filter ( ( lang ) => VALID_LANGUAGES . includes ( lang ) ) ;
202+
203+ return {
204+ slug,
205+ meta : {
206+ title : node . frontmatter . title ! ,
207+ meta_description : node . frontmatter . meta_description ! ,
208+ } ,
209+ languages,
210+ } ;
211+ } ) ,
212+ ) ;
136213
137214 const allPages = [ ...textilePages , ...mdxPages ] ;
138215
@@ -143,15 +220,27 @@ export const onPostBuild: GatsbyNode['onPostBuild'] = async ({ graphql, reporter
143220 const serializedPages = [ LLMS_TXT_PREAMBLE ] ;
144221
145222 for ( const page of allPages ) {
146- const { slug, meta } = page ;
223+ const { slug, meta, languages } = page ;
147224 const { title, meta_description } = meta ;
148225
149226 try {
150- const url = prefixPath ( { url : `/docs/${ slug } ` , siteUrl, pathPrefix : basePath } ) ;
227+ const baseUrl = prefixPath ( { url : `/docs/${ slug } ` , siteUrl, pathPrefix : basePath } ) ;
151228 const safeTitle = escapeMarkdown ( title ) ;
152- const link = `[${ safeTitle } ](${ url } )` ;
153- const line = `- ${ [ link , meta_description ] . join ( ': ' ) } ` ;
154- serializedPages . push ( line ) ;
229+
230+ // Generate base page entry (without language parameter)
231+ const baseLink = `[${ safeTitle } ](${ baseUrl } )` ;
232+ const baseLine = `- ${ [ baseLink , meta_description ] . join ( ': ' ) } ` ;
233+ serializedPages . push ( baseLine ) ;
234+
235+ // Generate language-specific entries if the page has languages
236+ if ( languages && languages . length > 0 ) {
237+ for ( const language of languages ) {
238+ const langUrl = `${ baseUrl } ?lang=${ language } ` ;
239+ const langLink = `[${ safeTitle } (${ getLanguageLabel ( language ) } )](${ langUrl } )` ;
240+ const langLine = `- ${ [ langLink , meta_description ] . join ( ': ' ) } ` ;
241+ serializedPages . push ( langLine ) ;
242+ }
243+ }
155244 } catch ( err ) {
156245 reporter . panic ( `${ REPORTER_PREFIX } Error serializing pages` , err as Error ) ;
157246 }
0 commit comments