Skip to main content

Python Examples

This page provides comprehensive Python examples for integrating with the Harvest Context API. All examples use the requests library and demonstrate best practices for authentication, error handling, and context retrieval.

Installation

pip install requests

Basic Setup

import os

# Configuration
BASE_URL = "https://www.goharvest.ai/api/v1/developer"
API_KEY = os.getenv("HARVEST_API_KEY")  # Store your API key in environment variables

if not API_KEY:
    raise ValueError("HARVEST_API_KEY environment variable is required")

HEADERS = {
    "API-Key": API_KEY,
    "Content-Type": "application/json"
}

Complete Context Retrieval Workflow

This example demonstrates the full workflow: listing available indexes and retrieving context from a specific service.
import requests
import os

class HarvestContextClient:
    def __init__(self, api_key):
        self.api_key = api_key
        self.base_url = "https://www.goharvest.ai/api/v1/developer"
        self.headers = {
            "API-Key": api_key,
            "Content-Type": "application/json"
        }

    def list_indexes(self):
        """List all available indexed services"""
        url = f"{self.base_url}/list-indexes"

        try:
            response = requests.get(url, headers=self.headers)
            response.raise_for_status()
            result = response.json()

            if result.get('ok'):
                indexes = result['data']['indexes']
                print(f"✅ Found {len(indexes)} indexed services")
                return indexes
            else:
                print(f"❌ Error: {result.get('error')}")
                return []

        except requests.exceptions.RequestException as e:
            print(f"❌ Error listing indexes: {e}")
            return []

    def get_context(self, index_id, query):
        """Retrieve context from a specific indexed service"""
        url = f"{self.base_url}/get-context"

        payload = {
            "query": query,
            "index_id": index_id
        }

        try:
            response = requests.post(url, headers=self.headers, json=payload)
            response.raise_for_status()
            result = response.json()

            if result.get('ok'):
                context = result['data']['context']
                print(f"✅ Retrieved context ({len(context)} characters)")
                return context
            else:
                print(f"❌ Error: {result.get('error')}")
                return None

        except requests.exceptions.RequestException as e:
            print(f"❌ Error getting context: {e}")
            return None

    def search_by_service_name(self, service_name, query):
        """Find a service by name and retrieve context"""
        indexes = self.list_indexes()

        # Find matching service (case-insensitive)
        matching_service = None
        for idx in indexes:
            if service_name.lower() in idx['name'].lower():
                matching_service = idx
                break

        if not matching_service:
            print(f"❌ Service '{service_name}' not found")
            print(f"Available services: {[idx['name'] for idx in indexes]}")
            return None

        print(f"🔍 Found service: {matching_service['name']} ({matching_service['type']})")
        return self.get_context(matching_service['index_id'], query)

    def interactive_query(self):
        """Interactive mode for exploring indexed services"""
        indexes = self.list_indexes()

        if not indexes:
            print("❌ No indexes available")
            return

        print("\n📚 Available Services:")
        for i, idx in enumerate(indexes, 1):
            print(f"  {i}. {idx['name']} ({idx['type']})")

        try:
            choice = int(input("\nSelect a service (number): ")) - 1
            if 0 <= choice < len(indexes):
                selected = indexes[choice]
                query = input("Enter your query: ")

                print(f"\n🔍 Querying {selected['name']}...")
                context = self.get_context(selected['index_id'], query)

                if context:
                    print("\n" + "="*80)
                    print("CONTEXT:")
                    print("="*80)
                    print(context)
                    print("="*80)
            else:
                print("❌ Invalid selection")
        except (ValueError, KeyboardInterrupt):
            print("\n👋 Cancelled")

# Usage examples
def main():
    client = HarvestContextClient(os.getenv("HARVEST_API_KEY"))

    # Example 1: List all available services
    print("Example 1: Listing all services\n")
    indexes = client.list_indexes()
    for idx in indexes:
        print(f"  - {idx['name']} ({idx['type']}): {idx['index_id']}")

    # Example 2: Get context from first available service
    if indexes:
        print("\nExample 2: Getting context\n")
        first_index = indexes[0]
        context = client.get_context(
            first_index['index_id'],
            "how to make authenticated POST request"
        )
        if context:
            print(context[:500] + "..." if len(context) > 500 else context)

    # Example 3: Search by service name
    print("\nExample 3: Search by service name\n")
    context = client.search_by_service_name(
        "Python",
        "how to handle exceptions"
    )

    # Example 4: Interactive mode (uncomment to use)
    # client.interactive_query()

if __name__ == "__main__":
    main()

Advanced: Caching Context Results

import requests
import json
import hashlib
from datetime import datetime, timedelta
from pathlib import Path

class CachedHarvestClient:
    def __init__(self, api_key, cache_dir=".harvest_cache", cache_ttl_hours=24):
        self.api_key = api_key
        self.base_url = "https://www.goharvest.ai/api/v1/developer"
        self.headers = {
            "API-Key": api_key,
            "Content-Type": "application/json"
        }
        self.cache_dir = Path(cache_dir)
        self.cache_dir.mkdir(exist_ok=True)
        self.cache_ttl = timedelta(hours=cache_ttl_hours)

    def _get_cache_key(self, index_id, query):
        """Generate cache key from index_id and query"""
        content = f"{index_id}:{query}"
        return hashlib.md5(content.encode()).hexdigest()

    def _get_cache_path(self, cache_key):
        """Get cache file path"""
        return self.cache_dir / f"{cache_key}.json"

    def _read_cache(self, cache_key):
        """Read from cache if valid"""
        cache_path = self._get_cache_path(cache_key)

        if not cache_path.exists():
            return None

        try:
            with open(cache_path, 'r') as f:
                cached = json.load(f)

            cached_time = datetime.fromisoformat(cached['timestamp'])
            if datetime.now() - cached_time < self.cache_ttl:
                print(f"✅ Cache hit (age: {datetime.now() - cached_time})")
                return cached['context']
            else:
                print(f"⚠️  Cache expired")
                return None
        except Exception as e:
            print(f"⚠️  Cache read error: {e}")
            return None

    def _write_cache(self, cache_key, context):
        """Write to cache"""
        cache_path = self._get_cache_path(cache_key)

        try:
            with open(cache_path, 'w') as f:
                json.dump({
                    'timestamp': datetime.now().isoformat(),
                    'context': context
                }, f)
            print(f"💾 Cached result")
        except Exception as e:
            print(f"⚠️  Cache write error: {e}")

    def get_context(self, index_id, query, use_cache=True):
        """Get context with caching support"""
        cache_key = self._get_cache_key(index_id, query)

        # Try cache first
        if use_cache:
            cached_context = self._read_cache(cache_key)
            if cached_context:
                return cached_context

        # Fetch from API
        url = f"{self.base_url}/get-context"
        payload = {"query": query, "index_id": index_id}

        try:
            response = requests.post(url, headers=self.headers, json=payload)
            response.raise_for_status()
            result = response.json()

            if result.get('ok'):
                context = result['data']['context']
                self._write_cache(cache_key, context)
                return context
            else:
                print(f"❌ Error: {result.get('error')}")
                return None
        except requests.exceptions.RequestException as e:
            print(f"❌ Error: {e}")
            return None

Environment Variables

Create a .env file for configuration:
# .env file
HARVEST_API_KEY=your-api-key-here
Load environment variables in your Python code:
from dotenv import load_dotenv
import os

load_dotenv()

API_KEY = os.getenv("HARVEST_API_KEY")

Next Steps

I