Introduction
StateSet Knowledge Base enables your AI agents to access and utilize structured information effectively. By creating comprehensive knowledge bases, your agents can provide accurate, consistent, and contextual responses based on your organization’s specific information, policies, and procedures.
What is a Knowledge Base?
A Knowledge Base in StateSet is a collection of structured documents, FAQs, policies, and procedures that AI agents can search and reference when responding to queries. It serves as the “brain” of your AI system, ensuring responses are grounded in your specific business context.
Key Benefits
Accuracy Reduce hallucinations with fact-based responses
Consistency Ensure uniform information across all interactions
Scalability Update once, apply everywhere instantly
Prerequisites
Before creating a knowledge base:
StateSet account with API access
Documents in supported formats (TXT, PDF, MD, DOCX, JSON)
Basic understanding of vector embeddings (helpful but not required)
Getting Started
Install the SDK
npm install stateset-node
Initialize the Client
const { StateSetClient } = require ( 'stateset-node' );
const client = new StateSetClient ({
apiKey: process . env . STATESET_API_KEY
});
Create a Knowledge Base
async function createKnowledgeBase () {
try {
const kb = await client . knowledgeBase . create ({
name: 'Customer Support KB' ,
description: 'Comprehensive knowledge base for customer support' ,
settings: {
embedding_model: 'text-embedding-ada-002' ,
chunk_size: 1000 ,
chunk_overlap: 200 ,
metadata_fields: [ 'category' , 'product' , 'last_updated' ]
}
});
return kb ;
} catch ( error ) {
logger . error ( 'Failed to create knowledge base:' , error );
throw error ;
}
}
Adding Content
Document Upload
Upload documents to populate your knowledge base:
async function uploadDocument ( kbId , filePath , metadata = {}) {
const formData = new FormData ();
formData . append ( 'file' , fs . createReadStream ( filePath ));
formData . append ( 'metadata' , JSON . stringify ({
category: metadata . category || 'general' ,
source: metadata . source || 'manual_upload' ,
version: metadata . version || '1.0' ,
tags: metadata . tags || []
}));
try {
const document = await client . knowledgeBase . uploadDocument ( kbId , formData );
// Wait for processing to complete
const processed = await waitForProcessing ( document . id );
return {
success: true ,
documentId: document . id ,
chunks: processed . chunk_count
};
} catch ( error ) {
if ( error . code === 'UNSUPPORTED_FORMAT' ) {
throw new Error ( 'Document format not supported' );
}
throw error ;
}
}
async function waitForProcessing ( documentId , maxAttempts = 30 ) {
for ( let i = 0 ; i < maxAttempts ; i ++ ) {
const status = await client . documents . getStatus ( documentId );
if ( status . state === 'completed' ) {
return status ;
} else if ( status . state === 'failed' ) {
throw new Error ( `Processing failed: ${ status . error } ` );
}
// Wait before next check
await new Promise ( resolve => setTimeout ( resolve , 2000 ));
}
throw new Error ( 'Processing timeout' );
}
Structured Data Import
Import structured data like FAQs or product information:
async function importStructuredData ( kbId , data ) {
const entries = [];
// Process FAQs
if ( data . faqs ) {
for ( const faq of data . faqs ) {
entries . push ({
content: `Q: ${ faq . question } \n A: ${ faq . answer } ` ,
metadata: {
type: 'faq' ,
category: faq . category ,
keywords: faq . keywords || []
}
});
}
}
// Process policies
if ( data . policies ) {
for ( const policy of data . policies ) {
entries . push ({
content: policy . content ,
metadata: {
type: 'policy' ,
name: policy . name ,
effective_date: policy . effective_date ,
department: policy . department
}
});
}
}
// Batch import
const batchSize = 100 ;
const results = [];
for ( let i = 0 ; i < entries . length ; i += batchSize ) {
const batch = entries . slice ( i , i + batchSize );
try {
const result = await client . knowledgeBase . importBatch ( kbId , {
entries: batch ,
update_existing: true
});
results . push ( result );
} catch ( error ) {
logger . error ( `Batch import failed at index ${ i } :` , error );
// Continue with next batch
}
}
return {
totalEntries: entries . length ,
successfulBatches: results . filter ( r => r . success ). length ,
failedBatches: results . filter ( r => ! r . success ). length
};
}
Web Scraping Integration
Automatically import content from websites:
async function scrapeAndImport ( kbId , urls ) {
const scrapeJobs = [];
for ( const url of urls ) {
try {
const job = await client . knowledgeBase . createScrapeJob ( kbId , {
url ,
options: {
follow_links: true ,
max_depth: 2 ,
include_patterns: [ '/docs/' , '/help/' ],
exclude_patterns: [ '/api/' , '.pdf' ],
extract_metadata: true
}
});
scrapeJobs . push ( job );
} catch ( error ) {
logger . error ( `Failed to create scrape job for ${ url } :` , error );
}
}
// Monitor job progress
const completed = await monitorScrapeJobs ( scrapeJobs );
return {
total: scrapeJobs . length ,
completed: completed . length ,
pages_scraped: completed . reduce (( sum , job ) => sum + job . pages_processed , 0 )
};
}
Querying the Knowledge Base
Basic Search
async function searchKnowledgeBase ( kbId , query , options = {}) {
try {
const results = await client . knowledgeBase . search ( kbId , {
query ,
limit: options . limit || 5 ,
threshold: options . threshold || 0.7 ,
filters: options . filters || {},
include_metadata: true ,
rerank: true
});
return results . map ( result => ({
content: result . content ,
relevance: result . score ,
source: result . metadata . source ,
snippet: highlightRelevantText ( result . content , query )
}));
} catch ( error ) {
logger . error ( 'Knowledge base search failed:' , error );
return [];
}
}
function highlightRelevantText ( content , query ) {
// Extract most relevant sentence
const sentences = content . split ( '. ' );
const queryWords = query . toLowerCase (). split ( ' ' );
let bestSentence = sentences [ 0 ];
let maxMatches = 0 ;
for ( const sentence of sentences ) {
const matches = queryWords . filter ( word =>
sentence . toLowerCase (). includes ( word )
). length ;
if ( matches > maxMatches ) {
maxMatches = matches ;
bestSentence = sentence ;
}
}
return bestSentence ;
}
Advanced Filtering
async function advancedSearch ( kbId , params ) {
const results = await client . knowledgeBase . search ( kbId , {
query: params . query ,
filters: {
category: params . category ,
date_range: {
start: params . startDate ,
end: params . endDate
},
tags: { $in: params . tags },
metadata: {
department: params . department ,
version: { $gte: params . minVersion }
}
},
sort: {
field: 'last_updated' ,
order: 'desc'
},
aggregations: {
categories: { terms: { field: 'category' } },
tags: { terms: { field: 'tags' } }
}
});
return {
results: results . hits ,
facets: results . aggregations ,
total: results . total
};
}
Integration with Agents
Automatic Knowledge Base Access
Configure agents to automatically query knowledge bases:
async function createKnowledgeableAgent ( kbId ) {
const agent = await client . agents . create ({
name: 'Support Expert' ,
instructions: `You are a helpful support agent. Always search the knowledge base before responding to ensure accuracy.` ,
tools: [{
type: 'knowledge_base' ,
knowledge_base_id: kbId ,
settings: {
always_search: true ,
min_confidence: 0.8 ,
max_results: 3 ,
cite_sources: true
}
}],
behaviors: {
on_low_confidence: 'escalate_to_human' ,
on_no_results: 'acknowledge_limitation'
}
});
return agent ;
}
Dynamic Context Injection
async function enhanceAgentResponse ( agentId , userQuery ) {
// Search knowledge base first
const kbResults = await searchKnowledgeBase ( kbId , userQuery );
// Create enhanced prompt with context
const enhancedPrompt = {
query: userQuery ,
context: kbResults . map ( r => r . content ). join ( ' \n\n ' ),
instructions: 'Use the provided context to answer accurately. Cite sources when possible.' ,
metadata: {
sources: kbResults . map ( r => r . source )
}
};
// Get agent response with context
const response = await client . agents . query ( agentId , enhancedPrompt );
return {
answer: response . text ,
sources: response . metadata . sources_used ,
confidence: response . confidence
};
}
Managing Updates
Version Control
async function updateDocumentWithVersioning ( kbId , documentId , newContent ) {
// Get current version
const current = await client . knowledgeBase . getDocument ( kbId , documentId );
// Create new version
const updated = await client . knowledgeBase . updateDocument ( kbId , documentId , {
content: newContent ,
metadata: {
... current . metadata ,
version: incrementVersion ( current . metadata . version ),
previous_version: current . id ,
updated_at: new Date (). toISOString (),
updated_by: 'system'
},
archive_previous: true
});
// Trigger reindexing
await client . knowledgeBase . reindex ( kbId , {
document_ids: [ documentId ],
async: true
});
return updated ;
}
Bulk Updates
async function bulkUpdateKnowledgeBase ( kbId , updates ) {
const operations = updates . map ( update => ({
operation: update . operation || 'upsert' ,
document_id: update . id ,
content: update . content ,
metadata: update . metadata
}));
const result = await client . knowledgeBase . bulkUpdate ( kbId , {
operations ,
atomic: true , // All or nothing
validate: true // Pre-validate all operations
});
if ( result . failed . length > 0 ) {
logger . warn ( 'Some updates failed:' , result . failed );
}
return {
successful: result . successful . length ,
failed: result . failed . length ,
details: result . failed
};
}
Caching Strategies
class KnowledgeBaseCache {
constructor ( kbId , ttl = 3600 ) {
this . kbId = kbId ;
this . ttl = ttl ;
this . cache = new Map ();
}
async search ( query , options ) {
const cacheKey = this . getCacheKey ( query , options );
const cached = this . cache . get ( cacheKey );
if ( cached && cached . expires > Date . now ()) {
return cached . results ;
}
// Perform search
const results = await client . knowledgeBase . search ( this . kbId , {
query ,
... options
});
// Cache results
this . cache . set ( cacheKey , {
results ,
expires: Date . now () + ( this . ttl * 1000 )
});
// Clean old entries
this . cleanCache ();
return results ;
}
getCacheKey ( query , options ) {
return ` ${ query } _ ${ JSON . stringify ( options ) } ` ;
}
cleanCache () {
const now = Date . now ();
for ( const [ key , value ] of this . cache . entries ()) {
if ( value . expires < now ) {
this . cache . delete ( key );
}
}
}
}
Indexing Best Practices
async function optimizeKnowledgeBase ( kbId ) {
// Analyze current performance
const stats = await client . knowledgeBase . getStats ( kbId );
const optimizations = [];
// Check chunk size optimization
if ( stats . avg_chunk_size > 1500 ) {
optimizations . push ({
type: 'rechunk' ,
params: { chunk_size: 1000 , chunk_overlap: 200 }
});
}
// Check for duplicate content
if ( stats . duplicate_ratio > 0.1 ) {
optimizations . push ({
type: 'deduplicate' ,
params: { similarity_threshold: 0.95 }
});
}
// Apply optimizations
for ( const opt of optimizations ) {
await client . knowledgeBase . optimize ( kbId , opt );
}
return optimizations ;
}
Monitoring & Analytics
Usage Analytics
async function getKnowledgeBaseAnalytics ( kbId , timeframe = '30d' ) {
const analytics = await client . knowledgeBase . getAnalytics ( kbId , {
timeframe ,
metrics: [
'search_volume' ,
'unique_queries' ,
'avg_results_per_query' ,
'click_through_rate' ,
'user_satisfaction'
],
group_by: 'day'
});
// Identify improvement opportunities
const insights = {
popularQueries: analytics . top_queries . slice ( 0 , 10 ),
noResultQueries: analytics . queries_with_no_results ,
lowSatisfactionTopics: analytics . low_satisfaction_queries ,
recommendations: generateRecommendations ( analytics )
};
return insights ;
}
function generateRecommendations ( analytics ) {
const recommendations = [];
if ( analytics . no_results_rate > 0.1 ) {
recommendations . push ({
type: 'content_gap' ,
message: 'Add content for frequently searched topics with no results' ,
queries: analytics . queries_with_no_results . slice ( 0 , 5 )
});
}
if ( analytics . avg_results_quality < 0.7 ) {
recommendations . push ({
type: 'content_quality' ,
message: 'Review and improve content quality for better relevance'
});
}
return recommendations ;
}
Troubleshooting
Common Issues
Poor Search Results
// Diagnose search quality issues
async function diagnoseSearchQuality ( kbId , testQuery ) {
const diagnostics = await client . knowledgeBase . diagnose ( kbId , {
query: testQuery ,
return_embeddings: true ,
explain_scoring: true
});
return {
embedding_quality: diagnostics . embedding_stats ,
scoring_explanation: diagnostics . score_breakdown ,
suggestions: diagnostics . improvement_suggestions
};
}
Slow Query Performance
Reduce chunk size for faster processing
Implement caching for frequent queries
Use filters to narrow search scope
High Memory Usage
Limit concurrent processing jobs
Use streaming for large documents
Implement pagination for bulk operations
Security & Access Control
Implement Access Controls
async function setupAccessControl ( kbId ) {
await client . knowledgeBase . updateSettings ( kbId , {
access_control: {
enabled: true ,
default_permission: 'read' ,
roles: {
admin: [ 'read' , 'write' , 'delete' , 'manage' ],
editor: [ 'read' , 'write' ],
viewer: [ 'read' ]
}
},
audit_logging: {
enabled: true ,
log_searches: true ,
log_updates: true
}
});
}
Next Steps