# @astrojs/discovery - Implementation Guide > Technical implementation details for building the Astro discovery integration ## Package Structure ``` @astrojs/discovery/ ├── package.json ├── README.md ├── LICENSE ├── tsconfig.json ├── src/ │ ├── index.ts # Main entry point │ ├── types.ts # TypeScript definitions │ ├── generators/ │ │ ├── robots.ts # robots.txt generation │ │ ├── llms.ts # llms.txt generation │ │ ├── humans.ts # humans.txt generation │ │ └── utils.ts # Shared utilities │ ├── templates/ │ │ ├── robots.template.ts │ │ ├── llms.template.ts │ │ └── humans.template.ts │ └── validators/ │ └── config.ts # Config validation ├── dist/ # Built output └── tests/ ├── robots.test.ts ├── llms.test.ts ├── humans.test.ts └── integration.test.ts ``` ## Core Implementation ### 1. Main Integration File (`src/index.ts`) ```typescript import type { AstroIntegration } from 'astro'; import type { DiscoveryConfig } from './types'; import sitemap from '@astrojs/sitemap'; import { generateRobotsTxt } from './generators/robots'; import { generateLLMsTxt } from './generators/llms'; import { generateHumansTxt } from './generators/humans'; import { validateConfig } from './validators/config'; export default function discovery( userConfig: DiscoveryConfig = {} ): AstroIntegration { // Merge with defaults const config = validateConfig(userConfig); return { name: '@astrojs/discovery', hooks: { 'astro:config:setup': ({ config: astroConfig, injectRoute, updateConfig }) => { // Ensure site is configured if (!astroConfig.site) { throw new Error( '@astrojs/discovery requires `site` to be set in astro.config.mjs' ); } // Add sitemap integration updateConfig({ integrations: [ sitemap(config.sitemap || {}) ] }); // Inject dynamic routes for discovery files if (config.robots?.enabled !== false) { injectRoute({ pattern: '/robots.txt', entrypoint: '@astrojs/discovery/routes/robots.ts', prerender: true }); } if (config.llms?.enabled !== false) { injectRoute({ pattern: '/llms.txt', entrypoint: '@astrojs/discovery/routes/llms.ts', prerender: true }); } if (config.humans?.enabled !== false) { injectRoute({ pattern: '/humans.txt', entrypoint: '@astrojs/discovery/routes/humans.ts', prerender: true }); } }, 'astro:build:done': ({ dir, routes }) => { // Post-build validation console.log('✅ Discovery files generated:'); if (config.robots?.enabled !== false) console.log(' - /robots.txt'); if (config.llms?.enabled !== false) console.log(' - /llms.txt'); if (config.humans?.enabled !== false) console.log(' - /humans.txt'); console.log(' - /sitemap-index.xml'); } } }; } // Named exports export type { DiscoveryConfig } from './types'; ``` ### 2. Type Definitions (`src/types.ts`) ```typescript export interface DiscoveryConfig { robots?: RobotsConfig; llms?: LLMsConfig; humans?: HumansConfig; sitemap?: SitemapConfig; caching?: CachingConfig; templates?: TemplateConfig; } export interface RobotsConfig { enabled?: boolean; crawlDelay?: number; allowAllBots?: boolean; llmBots?: { enabled?: boolean; agents?: string[]; }; additionalAgents?: Array<{ userAgent: string; allow?: string[]; disallow?: string[]; }>; customRules?: string; } export interface LLMsConfig { enabled?: boolean; description?: string | (() => string); keyFeatures?: string[]; importantPages?: ImportantPage[] | (() => Promise); instructions?: string; apiEndpoints?: APIEndpoint[]; techStack?: TechStack; brandVoice?: string[]; customSections?: Record; } export interface HumansConfig { enabled?: boolean; team?: TeamMember[]; thanks?: string[]; site?: SiteInfo; story?: string; funFacts?: string[]; philosophy?: string[]; customSections?: Record; } export interface SitemapConfig { filter?: (page: string) => boolean; customPages?: string[]; changefreq?: 'always' | 'hourly' | 'daily' | 'weekly' | 'monthly' | 'yearly' | 'never'; priority?: number; } export interface CachingConfig { robots?: number; llms?: number; humans?: number; sitemap?: number; } export interface TemplateConfig { robots?: (config: RobotsConfig, siteURL: URL) => string; llms?: (config: LLMsConfig, siteURL: URL) => string; humans?: (config: HumansConfig, siteURL: URL) => string; } export interface ImportantPage { name: string; path: string; description?: string; } export interface APIEndpoint { path: string; method?: string; description: string; } export interface TechStack { frontend?: string[]; backend?: string[]; ai?: string[]; other?: string[]; } export interface TeamMember { name: string; role?: string; contact?: string; location?: string; twitter?: string; github?: string; } export interface SiteInfo { lastUpdate?: string | 'auto'; language?: string; doctype?: string; ide?: string; techStack?: string[]; standards?: string[]; components?: string[]; software?: string[]; } ``` ### 3. Robots.txt Generator (`src/generators/robots.ts`) ```typescript import type { RobotsConfig } from '../types'; const DEFAULT_LLM_BOTS = [ 'Anthropic-AI', 'Claude-Web', 'GPTBot', 'ChatGPT-User', 'cohere-ai', 'Google-Extended' ]; export function generateRobotsTxt( config: RobotsConfig, siteURL: URL ): string { const lines: string[] = []; // Allow all bots by default if (config.allowAllBots !== false) { lines.push('User-agent: *'); lines.push('Allow: /'); lines.push(''); } // Add sitemap lines.push('# Sitemaps'); lines.push(`Sitemap: ${new URL('sitemap-index.xml', siteURL).href}`); lines.push(''); // LLM-specific rules if (config.llmBots?.enabled !== false) { lines.push('# LLM-specific resources'); lines.push('# See: https://github.com/anthropics/llm-txt'); const agents = config.llmBots?.agents || DEFAULT_LLM_BOTS; agents.forEach(agent => { lines.push(`User-agent: ${agent}`); }); lines.push('Allow: /llms.txt'); lines.push(''); } // Additional agent rules if (config.additionalAgents) { config.additionalAgents.forEach(agent => { lines.push(`User-agent: ${agent.userAgent}`); if (agent.allow) { agent.allow.forEach(path => { lines.push(`Allow: ${path}`); }); } if (agent.disallow) { agent.disallow.forEach(path => { lines.push(`Disallow: ${path}`); }); } lines.push(''); }); } // Crawl delay if (config.crawlDelay) { lines.push('# Crawl delay (be nice to our server)'); lines.push(`Crawl-delay: ${config.crawlDelay}`); lines.push(''); } // Custom rules if (config.customRules) { lines.push('# Custom rules'); lines.push(config.customRules); lines.push(''); } return lines.join('\n'); } ``` ### 4. LLMs.txt Generator (`src/generators/llms.ts`) ```typescript import type { LLMsConfig, ImportantPage } from '../types'; export async function generateLLMsTxt( config: LLMsConfig, siteURL: URL ): Promise { const lines: string[] = []; // Header const description = typeof config.description === 'function' ? config.description() : config.description; lines.push(`# ${siteURL.hostname}`); if (description) { lines.push(''); lines.push(`> ${description}`); } lines.push(''); lines.push('---'); lines.push(''); // Site Information lines.push('## Site Information'); lines.push(''); lines.push(`- **URL**: ${siteURL.href}`); if (description) { lines.push(`- **Description**: ${description}`); } lines.push(''); // Key Features if (config.keyFeatures && config.keyFeatures.length > 0) { lines.push('## Key Features'); lines.push(''); config.keyFeatures.forEach(feature => { lines.push(`- ${feature}`); }); lines.push(''); } // Important Pages if (config.importantPages) { const pages = typeof config.importantPages === 'function' ? await config.importantPages() : config.importantPages; if (pages.length > 0) { lines.push('## Important Pages'); lines.push(''); pages.forEach(page => { const url = new URL(page.path, siteURL).href; lines.push(`- **${page.name}**: ${url}`); if (page.description) { lines.push(` ${page.description}`); } }); lines.push(''); } } // Instructions for AI Assistants if (config.instructions) { lines.push('## For AI Assistants'); lines.push(''); lines.push(config.instructions); lines.push(''); } // API Endpoints if (config.apiEndpoints && config.apiEndpoints.length > 0) { lines.push('## API Endpoints'); lines.push(''); config.apiEndpoints.forEach(endpoint => { const method = endpoint.method || 'GET'; lines.push(`- \`${method} ${endpoint.path}\` - ${endpoint.description}`); }); lines.push(''); } // Tech Stack if (config.techStack) { lines.push('## Technical Stack'); lines.push(''); if (config.techStack.frontend) { lines.push(`- **Frontend**: ${config.techStack.frontend.join(', ')}`); } if (config.techStack.backend) { lines.push(`- **Backend**: ${config.techStack.backend.join(', ')}`); } if (config.techStack.ai) { lines.push(`- **AI**: ${config.techStack.ai.join(', ')}`); } if (config.techStack.other) { lines.push(`- **Other**: ${config.techStack.other.join(', ')}`); } lines.push(''); } // Brand Voice if (config.brandVoice && config.brandVoice.length > 0) { lines.push('## Brand Voice'); lines.push(''); config.brandVoice.forEach(item => { lines.push(`- ${item}`); }); lines.push(''); } // Custom Sections if (config.customSections) { Object.entries(config.customSections).forEach(([title, content]) => { lines.push(`## ${title}`); lines.push(''); lines.push(content); lines.push(''); }); } // Footer lines.push('---'); lines.push(''); lines.push(`Last Updated: ${new Date().toISOString().split('T')[0]}`); return lines.join('\n'); } ``` ### 5. Humans.txt Generator (`src/generators/humans.ts`) ```typescript import type { HumansConfig } from '../types'; export function generateHumansTxt(config: HumansConfig): string { const lines: string[] = []; // Team section if (config.team && config.team.length > 0) { lines.push('/* TEAM */'); lines.push(''); config.team.forEach((member, index) => { if (index > 0) lines.push(''); lines.push(`Name: ${member.name}`); if (member.role) lines.push(`Role: ${member.role}`); if (member.contact) lines.push(`Contact: ${member.contact}`); if (member.location) lines.push(`From: ${member.location}`); if (member.twitter) lines.push(`Twitter: ${member.twitter}`); if (member.github) lines.push(`GitHub: ${member.github}`); }); lines.push(''); } // Thanks section if (config.thanks && config.thanks.length > 0) { lines.push('/* THANKS */'); lines.push(''); config.thanks.forEach(thanks => { lines.push(`- ${thanks}`); }); lines.push(''); } // Site section if (config.site) { lines.push('/* SITE */'); lines.push(''); const lastUpdate = config.site.lastUpdate === 'auto' ? new Date().toISOString().split('T')[0] : config.site.lastUpdate; if (lastUpdate) lines.push(`Last update: ${lastUpdate}`); if (config.site.language) lines.push(`Language: ${config.site.language}`); if (config.site.doctype) lines.push(`Doctype: ${config.site.doctype}`); if (config.site.ide) lines.push(`IDE: ${config.site.ide}`); if (config.site.techStack) { lines.push(`Tech Stack: ${config.site.techStack.join(', ')}`); } if (config.site.standards) { lines.push(`Standards: ${config.site.standards.join(', ')}`); } if (config.site.components) { lines.push(`Components: ${config.site.components.join(', ')}`); } if (config.site.software) { lines.push(`Software: ${config.site.software.join(', ')}`); } lines.push(''); } // Story section if (config.story) { lines.push('/* THE STORY */'); lines.push(''); lines.push(config.story); lines.push(''); } // Fun Facts section if (config.funFacts && config.funFacts.length > 0) { lines.push('/* FUN FACTS */'); lines.push(''); config.funFacts.forEach(fact => { lines.push(`- ${fact}`); }); lines.push(''); } // Philosophy section if (config.philosophy && config.philosophy.length > 0) { lines.push('/* PHILOSOPHY */'); lines.push(''); config.philosophy.forEach(item => { lines.push(`"${item}"`); }); lines.push(''); } // Custom sections if (config.customSections) { Object.entries(config.customSections).forEach(([title, content]) => { lines.push(`/* ${title.toUpperCase()} */`); lines.push(''); lines.push(content); lines.push(''); }); } return lines.join('\n'); } ``` ### 6. API Route Template (`routes/robots.ts`) ```typescript import type { APIRoute } from 'astro'; import { generateRobotsTxt } from '../generators/robots'; import { getConfig } from '../config'; export const GET: APIRoute = ({ site }) => { const config = getConfig(); const siteURL = site || new URL('http://localhost:4321'); const content = config.templates?.robots ? config.templates.robots(config.robots, siteURL) : generateRobotsTxt(config.robots, siteURL); return new Response(content, { status: 200, headers: { 'Content-Type': 'text/plain; charset=utf-8', 'Cache-Control': `public, max-age=${config.caching?.robots || 3600}`, }, }); }; ``` ## Testing Strategy ### Unit Tests ```typescript // tests/robots.test.ts import { describe, it, expect } from 'vitest'; import { generateRobotsTxt } from '../src/generators/robots'; describe('generateRobotsTxt', () => { it('generates basic robots.txt', () => { const result = generateRobotsTxt({}, new URL('https://example.com')); expect(result).toContain('User-agent: *'); expect(result).toContain('Sitemap: https://example.com/sitemap-index.xml'); }); it('includes LLM bots when enabled', () => { const result = generateRobotsTxt( { llmBots: { enabled: true } }, new URL('https://example.com') ); expect(result).toContain('Anthropic-AI'); expect(result).toContain('GPTBot'); }); it('respects custom crawl delay', () => { const result = generateRobotsTxt( { crawlDelay: 5 }, new URL('https://example.com') ); expect(result).toContain('Crawl-delay: 5'); }); }); ``` ### Integration Tests ```typescript // tests/integration.test.ts import { describe, it, expect } from 'vitest'; import { testIntegration } from '@astrojs/test-utils'; import discovery from '../src/index'; describe('discovery integration', () => { it('generates all discovery files', async () => { const fixture = await testIntegration({ integrations: [discovery()], site: 'https://example.com' }); const files = await fixture.readdir('dist'); expect(files).toContain('robots.txt'); expect(files).toContain('llms.txt'); expect(files).toContain('humans.txt'); expect(files).toContain('sitemap-index.xml'); }); }); ``` ## Build & Publish ### package.json ```json { "name": "@astrojs/discovery", "version": "1.0.0", "description": "Complete discovery integration for Astro", "type": "module", "exports": { ".": "./dist/index.js", "./routes/*": "./dist/routes/*" }, "files": [ "dist", "README.md" ], "scripts": { "build": "tsc", "test": "vitest", "prepublishOnly": "npm run build && npm test" }, "peerDependencies": { "astro": "^5.0.0" }, "dependencies": { "@astrojs/sitemap": "^3.6.0" }, "devDependencies": { "@astrojs/test-utils": "^1.0.0", "typescript": "^5.3.0", "vitest": "^1.0.0" }, "keywords": [ "astro", "astro-integration", "robots", "sitemap", "llms", "humans", "discovery", "seo" ] } ``` ## Future Enhancements 1. **security.txt Support** - Add RFC 9116 security.txt generation 2. **ads.txt Support** - For sites with advertising 3. **manifest.json Support** - PWA manifest generation 4. **RSS Feed Integration** - Optional RSS feed generation 5. **OpenGraph Tags** - Meta tag injection 6. **Structured Data** - JSON-LD schema.org markup 7. **Analytics Integration** - Built-in analytics discovery 8. **i18n Support** - Multi-language discovery files ## Resources - [Astro Integration API](https://docs.astro.build/en/reference/integrations-reference/) - [humanstxt.org](https://humanstxt.org/) - [robots.txt spec](https://developers.google.com/search/docs/crawling-indexing/robots/intro) - [llms.txt proposal](https://github.com/anthropics/llm-txt) --- **This integration is a proposal. Implementation details may vary based on Astro's API evolution.**