Introduction

StateSet Knowledge Base enables your AI agents to access and utilize structured information effectively. By creating comprehensive knowledge bases, your agents can provide accurate, consistent, and contextual responses based on your organization’s specific information, policies, and procedures.

What is a Knowledge Base?

A Knowledge Base in StateSet is a collection of structured documents, FAQs, policies, and procedures that AI agents can search and reference when responding to queries. It serves as the “brain” of your AI system, ensuring responses are grounded in your specific business context.

Key Benefits

Accuracy

Reduce hallucinations with fact-based responses

Consistency

Ensure uniform information across all interactions

Scalability

Update once, apply everywhere instantly

Prerequisites

Before creating a knowledge base:

  • StateSet account with API access
  • Documents in supported formats (TXT, PDF, MD, DOCX, JSON)
  • Basic understanding of vector embeddings (helpful but not required)

Getting Started

1

Install the SDK

npm install stateset-node
2

Initialize the Client

const { StateSetClient } = require('stateset-node');

const client = new StateSetClient({
  apiKey: process.env.STATESET_API_KEY
});
3

Create a Knowledge Base

async function createKnowledgeBase() {
  try {
    const kb = await client.knowledgeBase.create({
      name: 'Customer Support KB',
      description: 'Comprehensive knowledge base for customer support',
      settings: {
        embedding_model: 'text-embedding-ada-002',
        chunk_size: 1000,
        chunk_overlap: 200,
        metadata_fields: ['category', 'product', 'last_updated']
      }
    });
    
    return kb;
  } catch (error) {
    logger.error('Failed to create knowledge base:', error);
    throw error;
  }
}

Adding Content

Document Upload

Upload documents to populate your knowledge base:

async function uploadDocument(kbId, filePath, metadata = {}) {
  const formData = new FormData();
  formData.append('file', fs.createReadStream(filePath));
  formData.append('metadata', JSON.stringify({
    category: metadata.category || 'general',
    source: metadata.source || 'manual_upload',
    version: metadata.version || '1.0',
    tags: metadata.tags || []
  }));
  
  try {
    const document = await client.knowledgeBase.uploadDocument(kbId, formData);
    
    // Wait for processing to complete
    const processed = await waitForProcessing(document.id);
    
    return {
      success: true,
      documentId: document.id,
      chunks: processed.chunk_count
    };
  } catch (error) {
    if (error.code === 'UNSUPPORTED_FORMAT') {
      throw new Error('Document format not supported');
    }
    throw error;
  }
}

async function waitForProcessing(documentId, maxAttempts = 30) {
  for (let i = 0; i < maxAttempts; i++) {
    const status = await client.documents.getStatus(documentId);
    
    if (status.state === 'completed') {
      return status;
    } else if (status.state === 'failed') {
      throw new Error(`Processing failed: ${status.error}`);
    }
    
    // Wait before next check
    await new Promise(resolve => setTimeout(resolve, 2000));
  }
  
  throw new Error('Processing timeout');
}

Structured Data Import

Import structured data like FAQs or product information:

async function importStructuredData(kbId, data) {
  const entries = [];
  
  // Process FAQs
  if (data.faqs) {
    for (const faq of data.faqs) {
      entries.push({
        content: `Q: ${faq.question}\nA: ${faq.answer}`,
        metadata: {
          type: 'faq',
          category: faq.category,
          keywords: faq.keywords || []
        }
      });
    }
  }
  
  // Process policies
  if (data.policies) {
    for (const policy of data.policies) {
      entries.push({
        content: policy.content,
        metadata: {
          type: 'policy',
          name: policy.name,
          effective_date: policy.effective_date,
          department: policy.department
        }
      });
    }
  }
  
  // Batch import
  const batchSize = 100;
  const results = [];
  
  for (let i = 0; i < entries.length; i += batchSize) {
    const batch = entries.slice(i, i + batchSize);
    
    try {
      const result = await client.knowledgeBase.importBatch(kbId, {
        entries: batch,
        update_existing: true
      });
      
      results.push(result);
    } catch (error) {
      logger.error(`Batch import failed at index ${i}:`, error);
      // Continue with next batch
    }
  }
  
  return {
    totalEntries: entries.length,
    successfulBatches: results.filter(r => r.success).length,
    failedBatches: results.filter(r => !r.success).length
  };
}

Web Scraping Integration

Automatically import content from websites:

async function scrapeAndImport(kbId, urls) {
  const scrapeJobs = [];
  
  for (const url of urls) {
    try {
      const job = await client.knowledgeBase.createScrapeJob(kbId, {
        url,
        options: {
          follow_links: true,
          max_depth: 2,
          include_patterns: ['/docs/', '/help/'],
          exclude_patterns: ['/api/', '.pdf'],
          extract_metadata: true
        }
      });
      
      scrapeJobs.push(job);
    } catch (error) {
      logger.error(`Failed to create scrape job for ${url}:`, error);
    }
  }
  
  // Monitor job progress
  const completed = await monitorScrapeJobs(scrapeJobs);
  
  return {
    total: scrapeJobs.length,
    completed: completed.length,
    pages_scraped: completed.reduce((sum, job) => sum + job.pages_processed, 0)
  };
}

Querying the Knowledge Base

async function searchKnowledgeBase(kbId, query, options = {}) {
  try {
    const results = await client.knowledgeBase.search(kbId, {
      query,
      limit: options.limit || 5,
      threshold: options.threshold || 0.7,
      filters: options.filters || {},
      include_metadata: true,
      rerank: true
    });
    
    return results.map(result => ({
      content: result.content,
      relevance: result.score,
      source: result.metadata.source,
      snippet: highlightRelevantText(result.content, query)
    }));
  } catch (error) {
    logger.error('Knowledge base search failed:', error);
    return [];
  }
}

function highlightRelevantText(content, query) {
  // Extract most relevant sentence
  const sentences = content.split('. ');
  const queryWords = query.toLowerCase().split(' ');
  
  let bestSentence = sentences[0];
  let maxMatches = 0;
  
  for (const sentence of sentences) {
    const matches = queryWords.filter(word => 
      sentence.toLowerCase().includes(word)
    ).length;
    
    if (matches > maxMatches) {
      maxMatches = matches;
      bestSentence = sentence;
    }
  }
  
  return bestSentence;
}

Advanced Filtering

async function advancedSearch(kbId, params) {
  const results = await client.knowledgeBase.search(kbId, {
    query: params.query,
    filters: {
      category: params.category,
      date_range: {
        start: params.startDate,
        end: params.endDate
      },
      tags: { $in: params.tags },
      metadata: {
        department: params.department,
        version: { $gte: params.minVersion }
      }
    },
    sort: {
      field: 'last_updated',
      order: 'desc'
    },
    aggregations: {
      categories: { terms: { field: 'category' } },
      tags: { terms: { field: 'tags' } }
    }
  });
  
  return {
    results: results.hits,
    facets: results.aggregations,
    total: results.total
  };
}

Integration with Agents

Automatic Knowledge Base Access

Configure agents to automatically query knowledge bases:

async function createKnowledgeableAgent(kbId) {
  const agent = await client.agents.create({
    name: 'Support Expert',
    instructions: `You are a helpful support agent. Always search the knowledge base before responding to ensure accuracy.`,
    tools: [{
      type: 'knowledge_base',
      knowledge_base_id: kbId,
      settings: {
        always_search: true,
        min_confidence: 0.8,
        max_results: 3,
        cite_sources: true
      }
    }],
    behaviors: {
      on_low_confidence: 'escalate_to_human',
      on_no_results: 'acknowledge_limitation'
    }
  });
  
  return agent;
}

Dynamic Context Injection

async function enhanceAgentResponse(agentId, userQuery) {
  // Search knowledge base first
  const kbResults = await searchKnowledgeBase(kbId, userQuery);
  
  // Create enhanced prompt with context
  const enhancedPrompt = {
    query: userQuery,
    context: kbResults.map(r => r.content).join('\n\n'),
    instructions: 'Use the provided context to answer accurately. Cite sources when possible.',
    metadata: {
      sources: kbResults.map(r => r.source)
    }
  };
  
  // Get agent response with context
  const response = await client.agents.query(agentId, enhancedPrompt);
  
  return {
    answer: response.text,
    sources: response.metadata.sources_used,
    confidence: response.confidence
  };
}

Managing Updates

Version Control

async function updateDocumentWithVersioning(kbId, documentId, newContent) {
  // Get current version
  const current = await client.knowledgeBase.getDocument(kbId, documentId);
  
  // Create new version
  const updated = await client.knowledgeBase.updateDocument(kbId, documentId, {
    content: newContent,
    metadata: {
      ...current.metadata,
      version: incrementVersion(current.metadata.version),
      previous_version: current.id,
      updated_at: new Date().toISOString(),
      updated_by: 'system'
    },
    archive_previous: true
  });
  
  // Trigger reindexing
  await client.knowledgeBase.reindex(kbId, {
    document_ids: [documentId],
    async: true
  });
  
  return updated;
}

Bulk Updates

async function bulkUpdateKnowledgeBase(kbId, updates) {
  const operations = updates.map(update => ({
    operation: update.operation || 'upsert',
    document_id: update.id,
    content: update.content,
    metadata: update.metadata
  }));
  
  const result = await client.knowledgeBase.bulkUpdate(kbId, {
    operations,
    atomic: true, // All or nothing
    validate: true // Pre-validate all operations
  });
  
  if (result.failed.length > 0) {
    logger.warn('Some updates failed:', result.failed);
  }
  
  return {
    successful: result.successful.length,
    failed: result.failed.length,
    details: result.failed
  };
}

Performance Optimization

Caching Strategies

class KnowledgeBaseCache {
  constructor(kbId, ttl = 3600) {
    this.kbId = kbId;
    this.ttl = ttl;
    this.cache = new Map();
  }
  
  async search(query, options) {
    const cacheKey = this.getCacheKey(query, options);
    const cached = this.cache.get(cacheKey);
    
    if (cached && cached.expires > Date.now()) {
      return cached.results;
    }
    
    // Perform search
    const results = await client.knowledgeBase.search(this.kbId, {
      query,
      ...options
    });
    
    // Cache results
    this.cache.set(cacheKey, {
      results,
      expires: Date.now() + (this.ttl * 1000)
    });
    
    // Clean old entries
    this.cleanCache();
    
    return results;
  }
  
  getCacheKey(query, options) {
    return `${query}_${JSON.stringify(options)}`;
  }
  
  cleanCache() {
    const now = Date.now();
    for (const [key, value] of this.cache.entries()) {
      if (value.expires < now) {
        this.cache.delete(key);
      }
    }
  }
}

Indexing Best Practices

async function optimizeKnowledgeBase(kbId) {
  // Analyze current performance
  const stats = await client.knowledgeBase.getStats(kbId);
  
  const optimizations = [];
  
  // Check chunk size optimization
  if (stats.avg_chunk_size > 1500) {
    optimizations.push({
      type: 'rechunk',
      params: { chunk_size: 1000, chunk_overlap: 200 }
    });
  }
  
  // Check for duplicate content
  if (stats.duplicate_ratio > 0.1) {
    optimizations.push({
      type: 'deduplicate',
      params: { similarity_threshold: 0.95 }
    });
  }
  
  // Apply optimizations
  for (const opt of optimizations) {
    await client.knowledgeBase.optimize(kbId, opt);
  }
  
  return optimizations;
}

Monitoring & Analytics

Usage Analytics

async function getKnowledgeBaseAnalytics(kbId, timeframe = '30d') {
  const analytics = await client.knowledgeBase.getAnalytics(kbId, {
    timeframe,
    metrics: [
      'search_volume',
      'unique_queries',
      'avg_results_per_query',
      'click_through_rate',
      'user_satisfaction'
    ],
    group_by: 'day'
  });
  
  // Identify improvement opportunities
  const insights = {
    popularQueries: analytics.top_queries.slice(0, 10),
    noResultQueries: analytics.queries_with_no_results,
    lowSatisfactionTopics: analytics.low_satisfaction_queries,
    recommendations: generateRecommendations(analytics)
  };
  
  return insights;
}

function generateRecommendations(analytics) {
  const recommendations = [];
  
  if (analytics.no_results_rate > 0.1) {
    recommendations.push({
      type: 'content_gap',
      message: 'Add content for frequently searched topics with no results',
      queries: analytics.queries_with_no_results.slice(0, 5)
    });
  }
  
  if (analytics.avg_results_quality < 0.7) {
    recommendations.push({
      type: 'content_quality',
      message: 'Review and improve content quality for better relevance'
    });
  }
  
  return recommendations;
}

Troubleshooting

Common Issues

  1. Poor Search Results

    // Diagnose search quality issues
    async function diagnoseSearchQuality(kbId, testQuery) {
      const diagnostics = await client.knowledgeBase.diagnose(kbId, {
        query: testQuery,
        return_embeddings: true,
        explain_scoring: true
      });
      
      return {
        embedding_quality: diagnostics.embedding_stats,
        scoring_explanation: diagnostics.score_breakdown,
        suggestions: diagnostics.improvement_suggestions
      };
    }
    
  2. Slow Query Performance

    • Reduce chunk size for faster processing
    • Implement caching for frequent queries
    • Use filters to narrow search scope
  3. High Memory Usage

    • Limit concurrent processing jobs
    • Use streaming for large documents
    • Implement pagination for bulk operations

Security & Access Control

Implement Access Controls

async function setupAccessControl(kbId) {
  await client.knowledgeBase.updateSettings(kbId, {
    access_control: {
      enabled: true,
      default_permission: 'read',
      roles: {
        admin: ['read', 'write', 'delete', 'manage'],
        editor: ['read', 'write'],
        viewer: ['read']
      }
    },
    audit_logging: {
      enabled: true,
      log_searches: true,
      log_updates: true
    }
  });
}

Next Steps