astro-discovery/tests/robots.test.ts
Ryan Malloy d25dde4627 feat: initial implementation of @astrojs/discovery integration
This commit introduces a comprehensive Astro integration that automatically
generates discovery files for websites:

Features:
- robots.txt with LLM bot support (Anthropic-AI, GPTBot, etc.)
- llms.txt for AI assistant context and instructions
- humans.txt for team credits and site information
- Automatic sitemap integration via @astrojs/sitemap

Technical Details:
- TypeScript implementation with full type safety
- Configurable HTTP caching headers
- Custom template support for all generated files
- Sensible defaults with extensive customization options
- Date-based versioning (2025.11.03)

Testing:
- 34 unit tests covering all generators
- Test coverage for robots.txt, llms.txt, and humans.txt
- Integration with Vitest

Documentation:
- Comprehensive README with examples
- API reference documentation
- Contributing guidelines
- Example configurations (minimal and full)
2025-11-03 07:36:39 -07:00

95 lines
2.3 KiB
TypeScript

import { describe, it, expect } from 'vitest';
import { generateRobotsTxt } from '../src/generators/robots.js';
describe('generateRobotsTxt', () => {
const testURL = new URL('https://example.com');
it('generates basic robots.txt with defaults', () => {
const result = generateRobotsTxt({}, testURL);
expect(result).toContain('User-agent: *');
expect(result).toContain('Allow: /');
expect(result).toContain('Sitemap: https://example.com/sitemap-index.xml');
});
it('includes LLM bots when enabled', () => {
const result = generateRobotsTxt(
{ llmBots: { enabled: true } },
testURL
);
expect(result).toContain('Anthropic-AI');
expect(result).toContain('GPTBot');
expect(result).toContain('Claude-Web');
expect(result).toContain('Allow: /llms.txt');
});
it('excludes LLM bots when disabled', () => {
const result = generateRobotsTxt(
{ llmBots: { enabled: false } },
testURL
);
expect(result).not.toContain('Anthropic-AI');
expect(result).not.toContain('GPTBot');
});
it('respects custom crawl delay', () => {
const result = generateRobotsTxt(
{ crawlDelay: 5 },
testURL
);
expect(result).toContain('Crawl-delay: 5');
});
it('includes custom agents', () => {
const result = generateRobotsTxt(
{
additionalAgents: [
{
userAgent: 'CustomBot',
allow: ['/api'],
disallow: ['/admin'],
},
],
},
testURL
);
expect(result).toContain('User-agent: CustomBot');
expect(result).toContain('Allow: /api');
expect(result).toContain('Disallow: /admin');
});
it('includes custom rules', () => {
const customRules = 'User-agent: SpecialBot\nCrawl-delay: 10';
const result = generateRobotsTxt(
{ customRules },
testURL
);
expect(result).toContain(customRules);
});
it('allows custom LLM bot agents', () => {
const result = generateRobotsTxt(
{
llmBots: {
enabled: true,
agents: ['CustomAI', 'AnotherBot'],
},
},
testURL
);
expect(result).toContain('CustomAI');
expect(result).toContain('AnotherBot');
});
it('ends with newline', () => {
const result = generateRobotsTxt({}, testURL);
expect(result.endsWith('\n')).toBe(true);
});
});