This commit introduces a comprehensive Astro integration that automatically generates discovery files for websites: Features: - robots.txt with LLM bot support (Anthropic-AI, GPTBot, etc.) - llms.txt for AI assistant context and instructions - humans.txt for team credits and site information - Automatic sitemap integration via @astrojs/sitemap Technical Details: - TypeScript implementation with full type safety - Configurable HTTP caching headers - Custom template support for all generated files - Sensible defaults with extensive customization options - Date-based versioning (2025.11.03) Testing: - 34 unit tests covering all generators - Test coverage for robots.txt, llms.txt, and humans.txt - Integration with Vitest Documentation: - Comprehensive README with examples - API reference documentation - Contributing guidelines - Example configurations (minimal and full)
179 lines
4.9 KiB
TypeScript
179 lines
4.9 KiB
TypeScript
import { defineConfig } from 'astro/config';
|
|
import discovery from '@astrojs/discovery';
|
|
|
|
// Example configuration showing all available options
|
|
export default defineConfig({
|
|
site: 'https://example.com',
|
|
|
|
integrations: [
|
|
discovery({
|
|
// Robots.txt configuration
|
|
robots: {
|
|
crawlDelay: 2,
|
|
allowAllBots: true,
|
|
llmBots: {
|
|
enabled: true,
|
|
// Default bots are included, add custom ones here
|
|
agents: [
|
|
'Anthropic-AI',
|
|
'Claude-Web',
|
|
'GPTBot',
|
|
'ChatGPT-User',
|
|
'CustomBot',
|
|
],
|
|
},
|
|
additionalAgents: [
|
|
{
|
|
userAgent: 'BadBot',
|
|
disallow: ['/'],
|
|
},
|
|
{
|
|
userAgent: 'GoodBot',
|
|
allow: ['/api'],
|
|
disallow: ['/admin'],
|
|
},
|
|
],
|
|
customRules: `
|
|
# Custom rules
|
|
User-agent: SpecialBot
|
|
Crawl-delay: 10
|
|
`.trim(),
|
|
},
|
|
|
|
// LLMs.txt configuration
|
|
llms: {
|
|
description: 'Your site description for AI assistants',
|
|
keyFeatures: [
|
|
'Feature 1',
|
|
'Feature 2',
|
|
'Feature 3',
|
|
],
|
|
importantPages: [
|
|
{
|
|
name: 'Documentation',
|
|
path: '/docs',
|
|
description: 'Complete API documentation',
|
|
},
|
|
{
|
|
name: 'Blog',
|
|
path: '/blog',
|
|
description: 'Latest articles and tutorials',
|
|
},
|
|
],
|
|
instructions: `
|
|
When helping users with our site:
|
|
1. Check documentation first at /docs
|
|
2. Use provided API endpoints
|
|
3. Follow brand guidelines
|
|
4. Be helpful and accurate
|
|
`.trim(),
|
|
apiEndpoints: [
|
|
{
|
|
path: '/api/chat',
|
|
method: 'POST',
|
|
description: 'Chat endpoint for conversations',
|
|
},
|
|
{
|
|
path: '/api/search',
|
|
method: 'GET',
|
|
description: 'Search API for content',
|
|
},
|
|
],
|
|
techStack: {
|
|
frontend: ['Astro', 'TypeScript', 'React'],
|
|
backend: ['Node.js', 'FastAPI'],
|
|
ai: ['Claude', 'GPT-4'],
|
|
other: ['Docker', 'PostgreSQL'],
|
|
},
|
|
brandVoice: [
|
|
'Professional yet friendly',
|
|
'Technical but accessible',
|
|
'Focus on practical examples',
|
|
],
|
|
customSections: {
|
|
'Contact': 'For support, email support@example.com',
|
|
},
|
|
},
|
|
|
|
// Humans.txt configuration
|
|
humans: {
|
|
team: [
|
|
{
|
|
name: 'Jane Doe',
|
|
role: 'Creator & Developer',
|
|
contact: 'jane@example.com',
|
|
location: 'San Francisco, CA',
|
|
twitter: '@janedoe',
|
|
github: 'janedoe',
|
|
},
|
|
{
|
|
name: 'John Smith',
|
|
role: 'Designer',
|
|
contact: 'john@example.com',
|
|
location: 'New York, NY',
|
|
},
|
|
],
|
|
thanks: [
|
|
'The Astro team for amazing tools',
|
|
'Open source community',
|
|
'Coffee ☕',
|
|
],
|
|
site: {
|
|
lastUpdate: 'auto', // or specific date like '2025-11-03'
|
|
language: 'English',
|
|
doctype: 'HTML5',
|
|
ide: 'VS Code',
|
|
techStack: ['Astro', 'TypeScript', 'React', 'Tailwind CSS'],
|
|
standards: ['HTML5', 'CSS3', 'ES2022'],
|
|
components: ['Astro Components', 'React Components'],
|
|
software: ['Node.js', 'TypeScript', 'Git'],
|
|
},
|
|
story: `
|
|
This project started when we realized there was a need for better
|
|
discovery mechanisms on the web. We wanted to make it easy for
|
|
search engines, AI assistants, and humans to understand what our
|
|
site is about and how to interact with it.
|
|
`.trim(),
|
|
funFacts: [
|
|
'Built with love and coffee',
|
|
'Over 100 commits in the first week',
|
|
'Designed with accessibility in mind',
|
|
],
|
|
philosophy: [
|
|
'Make the web more discoverable',
|
|
'Embrace open standards',
|
|
'Build with the future in mind',
|
|
],
|
|
customSections: {
|
|
'SUSTAINABILITY': 'This site is carbon neutral and hosted on green servers.',
|
|
},
|
|
},
|
|
|
|
// Sitemap configuration (passed to @astrojs/sitemap)
|
|
sitemap: {
|
|
filter: (page) =>
|
|
!page.includes('/admin') &&
|
|
!page.includes('/draft') &&
|
|
!page.includes('/private'),
|
|
changefreq: 'weekly',
|
|
priority: 0.7,
|
|
},
|
|
|
|
// HTTP caching configuration (in seconds)
|
|
caching: {
|
|
robots: 3600, // 1 hour
|
|
llms: 3600, // 1 hour
|
|
humans: 86400, // 24 hours
|
|
sitemap: 3600, // 1 hour
|
|
},
|
|
|
|
// Custom templates (optional)
|
|
// templates: {
|
|
// robots: (config, siteURL) => `Your custom robots.txt content`,
|
|
// llms: (config, siteURL) => `Your custom llms.txt content`,
|
|
// humans: (config, siteURL) => `Your custom humans.txt content`,
|
|
// },
|
|
}),
|
|
],
|
|
});
|