148 lines
3.6 KiB
TypeScript
148 lines
3.6 KiB
TypeScript
/**
|
|
* Web Content Analyzer Configuration
|
|
*
|
|
* This file contains configuration settings for the web content analyzer feature,
|
|
* including credit costs and other operational parameters.
|
|
*/
|
|
|
|
export const webContentAnalyzerConfig = {
|
|
/**
|
|
* Credit cost for performing a web content analysis
|
|
*/
|
|
creditsCost: 100,
|
|
|
|
/**
|
|
* Maximum content length for AI analysis (in characters)
|
|
* Optimized to prevent token limit issues while maintaining quality
|
|
*/
|
|
maxContentLength: 8000,
|
|
|
|
/**
|
|
* Content truncation settings for performance optimization
|
|
*/
|
|
contentTruncation: {
|
|
/**
|
|
* Preferred truncation point as percentage of max length
|
|
* Try to truncate at sentence boundaries when possible
|
|
*/
|
|
preferredTruncationPoint: 0.8,
|
|
|
|
/**
|
|
* Minimum content length to consider for truncation
|
|
*/
|
|
minContentLength: 1000,
|
|
|
|
/**
|
|
* Maximum number of sentences to preserve when truncating
|
|
*/
|
|
maxSentences: 50,
|
|
},
|
|
|
|
/**
|
|
* Request timeout in milliseconds
|
|
*/
|
|
timeoutMillis: 55 * 1000, // 55 seconds
|
|
|
|
/**
|
|
* Performance optimization settings
|
|
*/
|
|
performance: {
|
|
/**
|
|
* Debounce delay for URL input (in milliseconds)
|
|
*/
|
|
urlInputDebounceMs: 500,
|
|
|
|
/**
|
|
* Image lazy loading threshold (intersection observer)
|
|
*/
|
|
lazyLoadingThreshold: 0.1,
|
|
|
|
/**
|
|
* Maximum number of retry attempts for failed requests
|
|
*/
|
|
maxRetryAttempts: 3,
|
|
|
|
/**
|
|
* Delay between retry attempts (in milliseconds)
|
|
*/
|
|
retryDelayMs: 1000,
|
|
},
|
|
|
|
/**
|
|
* Firecrawl API configuration and scraping options
|
|
*/
|
|
firecrawl: {
|
|
// API Configuration
|
|
apiKey: process.env.FIRECRAWL_API_KEY,
|
|
baseUrl: 'https://api.firecrawl.dev',
|
|
|
|
// Default scraping options
|
|
formats: ['markdown', 'screenshot'],
|
|
includeTags: ['title', 'meta', 'h1', 'h2', 'h3', 'p', 'article'],
|
|
excludeTags: ['script', 'style', 'nav', 'footer', 'aside'],
|
|
onlyMainContent: true,
|
|
waitFor: 2000,
|
|
|
|
// Screenshot optimization settings
|
|
screenshot: {
|
|
quality: 80, // Reduce quality for faster loading
|
|
fullPage: false, // Only capture viewport for performance
|
|
},
|
|
|
|
// Rate limiting and timeout settings
|
|
rateLimit: {
|
|
maxConcurrentRequests: 3,
|
|
requestDelay: 1000, // 1 second between requests
|
|
},
|
|
|
|
// Maximum content size (in characters)
|
|
maxContentSize: 100000, // 100KB of text content
|
|
},
|
|
|
|
/**
|
|
* OpenAI analysis options
|
|
*/
|
|
openai: {
|
|
model: 'gpt-4o-mini',
|
|
temperature: 0.1, // Low temperature for consistent results
|
|
/**
|
|
* Token optimization settings
|
|
*/
|
|
maxTokens: 2000, // Limit response tokens for performance
|
|
},
|
|
} as const;
|
|
|
|
/**
|
|
* Get the credit cost for web content analysis
|
|
*/
|
|
export function getWebContentAnalysisCost(): number {
|
|
return webContentAnalyzerConfig.creditsCost;
|
|
}
|
|
|
|
/**
|
|
* Validates if the Firecrawl API key is configured
|
|
*/
|
|
export function validateFirecrawlConfig(): boolean {
|
|
if (!webContentAnalyzerConfig.firecrawl.apiKey) {
|
|
console.warn(
|
|
'FIRECRAWL_API_KEY is not configured. Web content analysis features will not work.'
|
|
);
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Validate if the web content analyzer is properly configured
|
|
*/
|
|
export function validateWebContentAnalyzerConfig(): boolean {
|
|
return (
|
|
typeof webContentAnalyzerConfig.creditsCost === 'number' &&
|
|
webContentAnalyzerConfig.creditsCost > 0 &&
|
|
typeof webContentAnalyzerConfig.maxContentLength === 'number' &&
|
|
webContentAnalyzerConfig.maxContentLength > 0 &&
|
|
typeof webContentAnalyzerConfig.timeoutMillis === 'number' &&
|
|
webContentAnalyzerConfig.timeoutMillis > 0
|
|
);
|
|
}
|