This commit introduces a comprehensive Astro integration that automatically generates discovery files for websites: Features: - robots.txt with LLM bot support (Anthropic-AI, GPTBot, etc.) - llms.txt for AI assistant context and instructions - humans.txt for team credits and site information - Automatic sitemap integration via @astrojs/sitemap Technical Details: - TypeScript implementation with full type safety - Configurable HTTP caching headers - Custom template support for all generated files - Sensible defaults with extensive customization options - Date-based versioning (2025.11.03) Testing: - 34 unit tests covering all generators - Test coverage for robots.txt, llms.txt, and humans.txt - Integration with Vitest Documentation: - Comprehensive README with examples - API reference documentation - Contributing guidelines - Example configurations (minimal and full)
95 lines
2.3 KiB
TypeScript
95 lines
2.3 KiB
TypeScript
import { describe, it, expect } from 'vitest';
|
|
import { generateRobotsTxt } from '../src/generators/robots.js';
|
|
|
|
describe('generateRobotsTxt', () => {
|
|
const testURL = new URL('https://example.com');
|
|
|
|
it('generates basic robots.txt with defaults', () => {
|
|
const result = generateRobotsTxt({}, testURL);
|
|
|
|
expect(result).toContain('User-agent: *');
|
|
expect(result).toContain('Allow: /');
|
|
expect(result).toContain('Sitemap: https://example.com/sitemap-index.xml');
|
|
});
|
|
|
|
it('includes LLM bots when enabled', () => {
|
|
const result = generateRobotsTxt(
|
|
{ llmBots: { enabled: true } },
|
|
testURL
|
|
);
|
|
|
|
expect(result).toContain('Anthropic-AI');
|
|
expect(result).toContain('GPTBot');
|
|
expect(result).toContain('Claude-Web');
|
|
expect(result).toContain('Allow: /llms.txt');
|
|
});
|
|
|
|
it('excludes LLM bots when disabled', () => {
|
|
const result = generateRobotsTxt(
|
|
{ llmBots: { enabled: false } },
|
|
testURL
|
|
);
|
|
|
|
expect(result).not.toContain('Anthropic-AI');
|
|
expect(result).not.toContain('GPTBot');
|
|
});
|
|
|
|
it('respects custom crawl delay', () => {
|
|
const result = generateRobotsTxt(
|
|
{ crawlDelay: 5 },
|
|
testURL
|
|
);
|
|
|
|
expect(result).toContain('Crawl-delay: 5');
|
|
});
|
|
|
|
it('includes custom agents', () => {
|
|
const result = generateRobotsTxt(
|
|
{
|
|
additionalAgents: [
|
|
{
|
|
userAgent: 'CustomBot',
|
|
allow: ['/api'],
|
|
disallow: ['/admin'],
|
|
},
|
|
],
|
|
},
|
|
testURL
|
|
);
|
|
|
|
expect(result).toContain('User-agent: CustomBot');
|
|
expect(result).toContain('Allow: /api');
|
|
expect(result).toContain('Disallow: /admin');
|
|
});
|
|
|
|
it('includes custom rules', () => {
|
|
const customRules = 'User-agent: SpecialBot\nCrawl-delay: 10';
|
|
const result = generateRobotsTxt(
|
|
{ customRules },
|
|
testURL
|
|
);
|
|
|
|
expect(result).toContain(customRules);
|
|
});
|
|
|
|
it('allows custom LLM bot agents', () => {
|
|
const result = generateRobotsTxt(
|
|
{
|
|
llmBots: {
|
|
enabled: true,
|
|
agents: ['CustomAI', 'AnotherBot'],
|
|
},
|
|
},
|
|
testURL
|
|
);
|
|
|
|
expect(result).toContain('CustomAI');
|
|
expect(result).toContain('AnotherBot');
|
|
});
|
|
|
|
it('ends with newline', () => {
|
|
const result = generateRobotsTxt({}, testURL);
|
|
expect(result.endsWith('\n')).toBe(true);
|
|
});
|
|
});
|