This commit introduces a comprehensive Astro integration that automatically generates discovery files for websites: Features: - robots.txt with LLM bot support (Anthropic-AI, GPTBot, etc.) - llms.txt for AI assistant context and instructions - humans.txt for team credits and site information - Automatic sitemap integration via @astrojs/sitemap Technical Details: - TypeScript implementation with full type safety - Configurable HTTP caching headers - Custom template support for all generated files - Sensible defaults with extensive customization options - Date-based versioning (2025.11.03) Testing: - 34 unit tests covering all generators - Test coverage for robots.txt, llms.txt, and humans.txt - Integration with Vitest Documentation: - Comprehensive README with examples - API reference documentation - Contributing guidelines - Example configurations (minimal and full)
17 KiB
17 KiB
@astrojs/discovery - Implementation Guide
Technical implementation details for building the Astro discovery integration
Package Structure
@astrojs/discovery/
├── package.json
├── README.md
├── LICENSE
├── tsconfig.json
├── src/
│ ├── index.ts # Main entry point
│ ├── types.ts # TypeScript definitions
│ ├── generators/
│ │ ├── robots.ts # robots.txt generation
│ │ ├── llms.ts # llms.txt generation
│ │ ├── humans.ts # humans.txt generation
│ │ └── utils.ts # Shared utilities
│ ├── templates/
│ │ ├── robots.template.ts
│ │ ├── llms.template.ts
│ │ └── humans.template.ts
│ └── validators/
│ └── config.ts # Config validation
├── dist/ # Built output
└── tests/
├── robots.test.ts
├── llms.test.ts
├── humans.test.ts
└── integration.test.ts
Core Implementation
1. Main Integration File (src/index.ts)
import type { AstroIntegration } from 'astro';
import type { DiscoveryConfig } from './types';
import sitemap from '@astrojs/sitemap';
import { generateRobotsTxt } from './generators/robots';
import { generateLLMsTxt } from './generators/llms';
import { generateHumansTxt } from './generators/humans';
import { validateConfig } from './validators/config';
export default function discovery(
userConfig: DiscoveryConfig = {}
): AstroIntegration {
// Merge with defaults
const config = validateConfig(userConfig);
return {
name: '@astrojs/discovery',
hooks: {
'astro:config:setup': ({ config: astroConfig, injectRoute, updateConfig }) => {
// Ensure site is configured
if (!astroConfig.site) {
throw new Error(
'@astrojs/discovery requires `site` to be set in astro.config.mjs'
);
}
// Add sitemap integration
updateConfig({
integrations: [
sitemap(config.sitemap || {})
]
});
// Inject dynamic routes for discovery files
if (config.robots?.enabled !== false) {
injectRoute({
pattern: '/robots.txt',
entrypoint: '@astrojs/discovery/routes/robots.ts',
prerender: true
});
}
if (config.llms?.enabled !== false) {
injectRoute({
pattern: '/llms.txt',
entrypoint: '@astrojs/discovery/routes/llms.ts',
prerender: true
});
}
if (config.humans?.enabled !== false) {
injectRoute({
pattern: '/humans.txt',
entrypoint: '@astrojs/discovery/routes/humans.ts',
prerender: true
});
}
},
'astro:build:done': ({ dir, routes }) => {
// Post-build validation
console.log('✅ Discovery files generated:');
if (config.robots?.enabled !== false) console.log(' - /robots.txt');
if (config.llms?.enabled !== false) console.log(' - /llms.txt');
if (config.humans?.enabled !== false) console.log(' - /humans.txt');
console.log(' - /sitemap-index.xml');
}
}
};
}
// Named exports
export type { DiscoveryConfig } from './types';
2. Type Definitions (src/types.ts)
export interface DiscoveryConfig {
robots?: RobotsConfig;
llms?: LLMsConfig;
humans?: HumansConfig;
sitemap?: SitemapConfig;
caching?: CachingConfig;
templates?: TemplateConfig;
}
export interface RobotsConfig {
enabled?: boolean;
crawlDelay?: number;
allowAllBots?: boolean;
llmBots?: {
enabled?: boolean;
agents?: string[];
};
additionalAgents?: Array<{
userAgent: string;
allow?: string[];
disallow?: string[];
}>;
customRules?: string;
}
export interface LLMsConfig {
enabled?: boolean;
description?: string | (() => string);
keyFeatures?: string[];
importantPages?: ImportantPage[] | (() => Promise<ImportantPage[]>);
instructions?: string;
apiEndpoints?: APIEndpoint[];
techStack?: TechStack;
brandVoice?: string[];
customSections?: Record<string, string>;
}
export interface HumansConfig {
enabled?: boolean;
team?: TeamMember[];
thanks?: string[];
site?: SiteInfo;
story?: string;
funFacts?: string[];
philosophy?: string[];
customSections?: Record<string, string>;
}
export interface SitemapConfig {
filter?: (page: string) => boolean;
customPages?: string[];
changefreq?: 'always' | 'hourly' | 'daily' | 'weekly' | 'monthly' | 'yearly' | 'never';
priority?: number;
}
export interface CachingConfig {
robots?: number;
llms?: number;
humans?: number;
sitemap?: number;
}
export interface TemplateConfig {
robots?: (config: RobotsConfig, siteURL: URL) => string;
llms?: (config: LLMsConfig, siteURL: URL) => string;
humans?: (config: HumansConfig, siteURL: URL) => string;
}
export interface ImportantPage {
name: string;
path: string;
description?: string;
}
export interface APIEndpoint {
path: string;
method?: string;
description: string;
}
export interface TechStack {
frontend?: string[];
backend?: string[];
ai?: string[];
other?: string[];
}
export interface TeamMember {
name: string;
role?: string;
contact?: string;
location?: string;
twitter?: string;
github?: string;
}
export interface SiteInfo {
lastUpdate?: string | 'auto';
language?: string;
doctype?: string;
ide?: string;
techStack?: string[];
standards?: string[];
components?: string[];
software?: string[];
}
3. Robots.txt Generator (src/generators/robots.ts)
import type { RobotsConfig } from '../types';
const DEFAULT_LLM_BOTS = [
'Anthropic-AI',
'Claude-Web',
'GPTBot',
'ChatGPT-User',
'cohere-ai',
'Google-Extended'
];
export function generateRobotsTxt(
config: RobotsConfig,
siteURL: URL
): string {
const lines: string[] = [];
// Allow all bots by default
if (config.allowAllBots !== false) {
lines.push('User-agent: *');
lines.push('Allow: /');
lines.push('');
}
// Add sitemap
lines.push('# Sitemaps');
lines.push(`Sitemap: ${new URL('sitemap-index.xml', siteURL).href}`);
lines.push('');
// LLM-specific rules
if (config.llmBots?.enabled !== false) {
lines.push('# LLM-specific resources');
lines.push('# See: https://github.com/anthropics/llm-txt');
const agents = config.llmBots?.agents || DEFAULT_LLM_BOTS;
agents.forEach(agent => {
lines.push(`User-agent: ${agent}`);
});
lines.push('Allow: /llms.txt');
lines.push('');
}
// Additional agent rules
if (config.additionalAgents) {
config.additionalAgents.forEach(agent => {
lines.push(`User-agent: ${agent.userAgent}`);
if (agent.allow) {
agent.allow.forEach(path => {
lines.push(`Allow: ${path}`);
});
}
if (agent.disallow) {
agent.disallow.forEach(path => {
lines.push(`Disallow: ${path}`);
});
}
lines.push('');
});
}
// Crawl delay
if (config.crawlDelay) {
lines.push('# Crawl delay (be nice to our server)');
lines.push(`Crawl-delay: ${config.crawlDelay}`);
lines.push('');
}
// Custom rules
if (config.customRules) {
lines.push('# Custom rules');
lines.push(config.customRules);
lines.push('');
}
return lines.join('\n');
}
4. LLMs.txt Generator (src/generators/llms.ts)
import type { LLMsConfig, ImportantPage } from '../types';
export async function generateLLMsTxt(
config: LLMsConfig,
siteURL: URL
): Promise<string> {
const lines: string[] = [];
// Header
const description = typeof config.description === 'function'
? config.description()
: config.description;
lines.push(`# ${siteURL.hostname}`);
if (description) {
lines.push('');
lines.push(`> ${description}`);
}
lines.push('');
lines.push('---');
lines.push('');
// Site Information
lines.push('## Site Information');
lines.push('');
lines.push(`- **URL**: ${siteURL.href}`);
if (description) {
lines.push(`- **Description**: ${description}`);
}
lines.push('');
// Key Features
if (config.keyFeatures && config.keyFeatures.length > 0) {
lines.push('## Key Features');
lines.push('');
config.keyFeatures.forEach(feature => {
lines.push(`- ${feature}`);
});
lines.push('');
}
// Important Pages
if (config.importantPages) {
const pages = typeof config.importantPages === 'function'
? await config.importantPages()
: config.importantPages;
if (pages.length > 0) {
lines.push('## Important Pages');
lines.push('');
pages.forEach(page => {
const url = new URL(page.path, siteURL).href;
lines.push(`- **${page.name}**: ${url}`);
if (page.description) {
lines.push(` ${page.description}`);
}
});
lines.push('');
}
}
// Instructions for AI Assistants
if (config.instructions) {
lines.push('## For AI Assistants');
lines.push('');
lines.push(config.instructions);
lines.push('');
}
// API Endpoints
if (config.apiEndpoints && config.apiEndpoints.length > 0) {
lines.push('## API Endpoints');
lines.push('');
config.apiEndpoints.forEach(endpoint => {
const method = endpoint.method || 'GET';
lines.push(`- \`${method} ${endpoint.path}\` - ${endpoint.description}`);
});
lines.push('');
}
// Tech Stack
if (config.techStack) {
lines.push('## Technical Stack');
lines.push('');
if (config.techStack.frontend) {
lines.push(`- **Frontend**: ${config.techStack.frontend.join(', ')}`);
}
if (config.techStack.backend) {
lines.push(`- **Backend**: ${config.techStack.backend.join(', ')}`);
}
if (config.techStack.ai) {
lines.push(`- **AI**: ${config.techStack.ai.join(', ')}`);
}
if (config.techStack.other) {
lines.push(`- **Other**: ${config.techStack.other.join(', ')}`);
}
lines.push('');
}
// Brand Voice
if (config.brandVoice && config.brandVoice.length > 0) {
lines.push('## Brand Voice');
lines.push('');
config.brandVoice.forEach(item => {
lines.push(`- ${item}`);
});
lines.push('');
}
// Custom Sections
if (config.customSections) {
Object.entries(config.customSections).forEach(([title, content]) => {
lines.push(`## ${title}`);
lines.push('');
lines.push(content);
lines.push('');
});
}
// Footer
lines.push('---');
lines.push('');
lines.push(`Last Updated: ${new Date().toISOString().split('T')[0]}`);
return lines.join('\n');
}
5. Humans.txt Generator (src/generators/humans.ts)
import type { HumansConfig } from '../types';
export function generateHumansTxt(config: HumansConfig): string {
const lines: string[] = [];
// Team section
if (config.team && config.team.length > 0) {
lines.push('/* TEAM */');
lines.push('');
config.team.forEach((member, index) => {
if (index > 0) lines.push('');
lines.push(`Name: ${member.name}`);
if (member.role) lines.push(`Role: ${member.role}`);
if (member.contact) lines.push(`Contact: ${member.contact}`);
if (member.location) lines.push(`From: ${member.location}`);
if (member.twitter) lines.push(`Twitter: ${member.twitter}`);
if (member.github) lines.push(`GitHub: ${member.github}`);
});
lines.push('');
}
// Thanks section
if (config.thanks && config.thanks.length > 0) {
lines.push('/* THANKS */');
lines.push('');
config.thanks.forEach(thanks => {
lines.push(`- ${thanks}`);
});
lines.push('');
}
// Site section
if (config.site) {
lines.push('/* SITE */');
lines.push('');
const lastUpdate = config.site.lastUpdate === 'auto'
? new Date().toISOString().split('T')[0]
: config.site.lastUpdate;
if (lastUpdate) lines.push(`Last update: ${lastUpdate}`);
if (config.site.language) lines.push(`Language: ${config.site.language}`);
if (config.site.doctype) lines.push(`Doctype: ${config.site.doctype}`);
if (config.site.ide) lines.push(`IDE: ${config.site.ide}`);
if (config.site.techStack) {
lines.push(`Tech Stack: ${config.site.techStack.join(', ')}`);
}
if (config.site.standards) {
lines.push(`Standards: ${config.site.standards.join(', ')}`);
}
if (config.site.components) {
lines.push(`Components: ${config.site.components.join(', ')}`);
}
if (config.site.software) {
lines.push(`Software: ${config.site.software.join(', ')}`);
}
lines.push('');
}
// Story section
if (config.story) {
lines.push('/* THE STORY */');
lines.push('');
lines.push(config.story);
lines.push('');
}
// Fun Facts section
if (config.funFacts && config.funFacts.length > 0) {
lines.push('/* FUN FACTS */');
lines.push('');
config.funFacts.forEach(fact => {
lines.push(`- ${fact}`);
});
lines.push('');
}
// Philosophy section
if (config.philosophy && config.philosophy.length > 0) {
lines.push('/* PHILOSOPHY */');
lines.push('');
config.philosophy.forEach(item => {
lines.push(`"${item}"`);
});
lines.push('');
}
// Custom sections
if (config.customSections) {
Object.entries(config.customSections).forEach(([title, content]) => {
lines.push(`/* ${title.toUpperCase()} */`);
lines.push('');
lines.push(content);
lines.push('');
});
}
return lines.join('\n');
}
6. API Route Template (routes/robots.ts)
import type { APIRoute } from 'astro';
import { generateRobotsTxt } from '../generators/robots';
import { getConfig } from '../config';
export const GET: APIRoute = ({ site }) => {
const config = getConfig();
const siteURL = site || new URL('http://localhost:4321');
const content = config.templates?.robots
? config.templates.robots(config.robots, siteURL)
: generateRobotsTxt(config.robots, siteURL);
return new Response(content, {
status: 200,
headers: {
'Content-Type': 'text/plain; charset=utf-8',
'Cache-Control': `public, max-age=${config.caching?.robots || 3600}`,
},
});
};
Testing Strategy
Unit Tests
// tests/robots.test.ts
import { describe, it, expect } from 'vitest';
import { generateRobotsTxt } from '../src/generators/robots';
describe('generateRobotsTxt', () => {
it('generates basic robots.txt', () => {
const result = generateRobotsTxt({}, new URL('https://example.com'));
expect(result).toContain('User-agent: *');
expect(result).toContain('Sitemap: https://example.com/sitemap-index.xml');
});
it('includes LLM bots when enabled', () => {
const result = generateRobotsTxt(
{ llmBots: { enabled: true } },
new URL('https://example.com')
);
expect(result).toContain('Anthropic-AI');
expect(result).toContain('GPTBot');
});
it('respects custom crawl delay', () => {
const result = generateRobotsTxt(
{ crawlDelay: 5 },
new URL('https://example.com')
);
expect(result).toContain('Crawl-delay: 5');
});
});
Integration Tests
// tests/integration.test.ts
import { describe, it, expect } from 'vitest';
import { testIntegration } from '@astrojs/test-utils';
import discovery from '../src/index';
describe('discovery integration', () => {
it('generates all discovery files', async () => {
const fixture = await testIntegration({
integrations: [discovery()],
site: 'https://example.com'
});
const files = await fixture.readdir('dist');
expect(files).toContain('robots.txt');
expect(files).toContain('llms.txt');
expect(files).toContain('humans.txt');
expect(files).toContain('sitemap-index.xml');
});
});
Build & Publish
package.json
{
"name": "@astrojs/discovery",
"version": "1.0.0",
"description": "Complete discovery integration for Astro",
"type": "module",
"exports": {
".": "./dist/index.js",
"./routes/*": "./dist/routes/*"
},
"files": [
"dist",
"README.md"
],
"scripts": {
"build": "tsc",
"test": "vitest",
"prepublishOnly": "npm run build && npm test"
},
"peerDependencies": {
"astro": "^5.0.0"
},
"dependencies": {
"@astrojs/sitemap": "^3.6.0"
},
"devDependencies": {
"@astrojs/test-utils": "^1.0.0",
"typescript": "^5.3.0",
"vitest": "^1.0.0"
},
"keywords": [
"astro",
"astro-integration",
"robots",
"sitemap",
"llms",
"humans",
"discovery",
"seo"
]
}
Future Enhancements
- security.txt Support - Add RFC 9116 security.txt generation
- ads.txt Support - For sites with advertising
- manifest.json Support - PWA manifest generation
- RSS Feed Integration - Optional RSS feed generation
- OpenGraph Tags - Meta tag injection
- Structured Data - JSON-LD schema.org markup
- Analytics Integration - Built-in analytics discovery
- i18n Support - Multi-language discovery files
Resources
This integration is a proposal. Implementation details may vary based on Astro's API evolution.