Browse Source

feat: integrate OpenRouter API for real embeddings in Qdrant sync #74

- Replace generateSimpleEmbedding with generateEmbedding using OpenRouter API
- Add generateEmbeddingBatch for efficient batch embedding generation
- Use openai/text-embedding-3-large model (3072 dimensions)
- Update all sync functions (Shopify, WooCommerce) to use batch embeddings
- Fallback to simple embeddings if OpenRouter API key not configured
- Add proper error handling and logging for embedding generation
Claude 5 months ago
parent
commit
10b3471d6f

+ 108 - 4
supabase/functions/_shared/qdrant-client.ts

@@ -347,11 +347,115 @@ export async function initializeStoreCollections(
 }
 
 /**
- * Generate a simple embedding vector (placeholder - should use OpenAI API in production)
+ * Generate embeddings using OpenRouter API with openai/text-embedding-3-large
  */
-export function generateSimpleEmbedding(text: string): number[] {
-  // This is a placeholder. In production, you should use OpenAI's text-embedding-3-large
-  // For now, generate a random normalized vector
+export async function generateEmbedding(text: string): Promise<number[]> {
+  const OPENROUTER_API_KEY = Deno.env.get('OPENROUTER_API_KEY');
+
+  if (!OPENROUTER_API_KEY) {
+    console.error('[Qdrant] OPENROUTER_API_KEY not found, using fallback embeddings');
+    return generateSimpleEmbedding(text);
+  }
+
+  try {
+    // Truncate text if too long (OpenAI has a token limit)
+    const truncatedText = text.substring(0, 8000); // ~8k chars ≈ 2k tokens
+
+    const response = await fetch('https://openrouter.ai/api/v1/embeddings', {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        'Authorization': `Bearer ${OPENROUTER_API_KEY}`,
+        'HTTP-Referer': 'https://shopcall.ai',
+        'X-Title': 'ShopCall.ai',
+      },
+      body: JSON.stringify({
+        model: 'openai/text-embedding-3-large',
+        input: truncatedText,
+      }),
+    });
+
+    if (!response.ok) {
+      const errorText = await response.text();
+      console.error(`[Qdrant] OpenRouter API error (${response.status}):`, errorText);
+      throw new Error(`OpenRouter API error: ${response.status}`);
+    }
+
+    const data = await response.json();
+
+    if (data.data && data.data.length > 0 && data.data[0].embedding) {
+      return data.data[0].embedding;
+    } else {
+      throw new Error('Invalid response format from OpenRouter API');
+    }
+  } catch (error: any) {
+    console.error('[Qdrant] Error generating embedding:', error);
+    // Fallback to simple embedding if API fails
+    return generateSimpleEmbedding(text);
+  }
+}
+
+/**
+ * Generate embeddings for multiple texts in batch
+ * More efficient than calling generateEmbedding multiple times
+ */
+export async function generateEmbeddingBatch(texts: string[]): Promise<number[][]> {
+  const OPENROUTER_API_KEY = Deno.env.get('OPENROUTER_API_KEY');
+
+  if (!OPENROUTER_API_KEY) {
+    console.error('[Qdrant] OPENROUTER_API_KEY not found, using fallback embeddings');
+    return texts.map(text => generateSimpleEmbedding(text));
+  }
+
+  if (texts.length === 0) {
+    return [];
+  }
+
+  try {
+    // Truncate texts if too long
+    const truncatedTexts = texts.map(text => text.substring(0, 8000));
+
+    const response = await fetch('https://openrouter.ai/api/v1/embeddings', {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        'Authorization': `Bearer ${OPENROUTER_API_KEY}`,
+        'HTTP-Referer': 'https://shopcall.ai',
+        'X-Title': 'ShopCall.ai',
+      },
+      body: JSON.stringify({
+        model: 'openai/text-embedding-3-large',
+        input: truncatedTexts,
+      }),
+    });
+
+    if (!response.ok) {
+      const errorText = await response.text();
+      console.error(`[Qdrant] OpenRouter API error (${response.status}):`, errorText);
+      throw new Error(`OpenRouter API error: ${response.status}`);
+    }
+
+    const data = await response.json();
+
+    if (data.data && Array.isArray(data.data)) {
+      // Sort by index to ensure correct order
+      const sorted = data.data.sort((a: any, b: any) => a.index - b.index);
+      return sorted.map((item: any) => item.embedding);
+    } else {
+      throw new Error('Invalid response format from OpenRouter API');
+    }
+  } catch (error: any) {
+    console.error('[Qdrant] Error generating batch embeddings:', error);
+    // Fallback to simple embeddings if API fails
+    return texts.map(text => generateSimpleEmbedding(text));
+  }
+}
+
+/**
+ * Generate a simple embedding vector (fallback when API unavailable)
+ */
+function generateSimpleEmbedding(text: string): number[] {
+  // Fallback: generate a random normalized vector
   const vector = new Array(VECTOR_SIZE).fill(0).map(() => Math.random() - 0.5);
 
   // Normalize the vector

+ 84 - 61
supabase/functions/shopify-sync/index.ts

@@ -18,7 +18,7 @@ import {
   scrollPoints,
   getCollectionName,
   initializeStoreCollections,
-  generateSimpleEmbedding,
+  generateEmbeddingBatch,
   createProductText,
   createOrderText,
   createCustomerText,
@@ -154,10 +154,10 @@ async function syncProductsToQdrant(
       })
     }
 
-    // Convert products to Qdrant points
-    const points: QdrantPoint[] = products.map((product) => {
+    // Generate text representations for all products
+    const productTexts = products.map((product) => {
       const primaryVariant = product.variants?.[0]
-      const productText = createProductText({
+      return createProductText({
         title: product.title,
         description: product.body_html,
         sku: primaryVariant?.sku,
@@ -168,10 +168,19 @@ async function syncProductsToQdrant(
         price: primaryVariant?.price,
         status: product.status,
       })
+    })
+
+    // Generate embeddings in batch
+    console.log(`[Qdrant] Generating embeddings for ${productTexts.length} products...`)
+    const embeddings = await generateEmbeddingBatch(productTexts)
+    console.log(`[Qdrant] Embeddings generated successfully`)
 
+    // Convert products to Qdrant points with embeddings
+    const points: QdrantPoint[] = products.map((product, index) => {
+      const primaryVariant = product.variants?.[0]
       return {
         id: `shopify-${storeId}-${product.id}`,
-        vector: generateSimpleEmbedding(productText),
+        vector: embeddings[index],
         payload: {
           store_id: storeId,
           product_id: product.id.toString(),
@@ -255,8 +264,9 @@ async function syncOrdersToQdrant(
       ])
     }
 
-    const points: QdrantPoint[] = orders.map((order) => {
-      const orderText = createOrderText({
+    // Generate text representations for all orders
+    const orderTexts = orders.map((order) =>
+      createOrderText({
         order_number: order.order_number,
         name: order.name,
         customer_name: order.customer ? `${order.customer.first_name || ''} ${order.customer.last_name || ''}`.trim() : null,
@@ -273,36 +283,42 @@ async function syncOrdersToQdrant(
         line_items: order.line_items,
         note: order.note,
       })
+    )
 
-      return {
-        id: `shopify-${storeId}-${order.id}`,
-        vector: generateSimpleEmbedding(orderText),
-        payload: {
-          store_id: storeId,
-          order_id: order.id.toString(),
-          platform: 'shopify',
-          order_number: order.order_number.toString(),
-          name: order.name,
-          email: order.email || null,
-          customer_name: order.customer ? `${order.customer.first_name || ''} ${order.customer.last_name || ''}`.trim() : null,
-          customer_email: order.customer?.email || order.email || null,
-          customer_phone: order.customer?.phone || order.billing_address?.phone || null,
-          phone: order.customer?.phone || order.billing_address?.phone || null,
-          financial_status: order.financial_status,
-          fulfillment_status: order.fulfillment_status || null,
-          total_price: parseFloat(order.current_total_price) || 0,
-          currency: order.currency,
-          billing_address: order.billing_address || null,
-          billing_city: order.billing_address?.city || null,
-          billing_country: order.billing_address?.country || null,
-          shipping_address: order.shipping_address || null,
-          shipping_city: order.shipping_address?.city || null,
-          shipping_country: order.shipping_address?.country || null,
-          note: order.note || null,
-          synced_at: new Date().toISOString(),
-        }
+    // Generate embeddings in batch
+    console.log(`[Qdrant] Generating embeddings for ${orderTexts.length} orders...`)
+    const embeddings = await generateEmbeddingBatch(orderTexts)
+    console.log(`[Qdrant] Embeddings generated successfully`)
+
+    // Convert orders to Qdrant points with embeddings
+    const points: QdrantPoint[] = orders.map((order, index) => ({
+      id: `shopify-${storeId}-${order.id}`,
+      vector: embeddings[index],
+      payload: {
+        store_id: storeId,
+        order_id: order.id.toString(),
+        platform: 'shopify',
+        order_number: order.order_number.toString(),
+        name: order.name,
+        email: order.email || null,
+        customer_name: order.customer ? `${order.customer.first_name || ''} ${order.customer.last_name || ''}`.trim() : null,
+        customer_email: order.customer?.email || order.email || null,
+        customer_phone: order.customer?.phone || order.billing_address?.phone || null,
+        phone: order.customer?.phone || order.billing_address?.phone || null,
+        financial_status: order.financial_status,
+        fulfillment_status: order.fulfillment_status || null,
+        total_price: parseFloat(order.current_total_price) || 0,
+        currency: order.currency,
+        billing_address: order.billing_address || null,
+        billing_city: order.billing_address?.city || null,
+        billing_country: order.billing_address?.country || null,
+        shipping_address: order.shipping_address || null,
+        shipping_city: order.shipping_address?.city || null,
+        shipping_country: order.shipping_address?.country || null,
+        note: order.note || null,
+        synced_at: new Date().toISOString(),
       }
-    })
+    }))
 
     await upsertPoints(collectionName, points)
     synced = points.length
@@ -365,8 +381,9 @@ async function syncCustomersToQdrant(
       ])
     }
 
-    const points: QdrantPoint[] = customers.map((customer) => {
-      const customerText = createCustomerText({
+    // Generate text representations for all customers
+    const customerTexts = customers.map((customer) =>
+      createCustomerText({
         first_name: customer.first_name,
         last_name: customer.last_name,
         email: customer.email,
@@ -380,32 +397,38 @@ async function syncCustomersToQdrant(
         accepts_marketing: customer.accepts_marketing,
         tags: customer.tags ? customer.tags.split(',').map(t => t.trim()) : [],
       })
+    )
 
-      return {
-        id: `shopify-${storeId}-${customer.id}`,
-        vector: generateSimpleEmbedding(customerText),
-        payload: {
-          store_id: storeId,
-          customer_id: customer.id.toString(),
-          platform: 'shopify',
-          email: customer.email,
-          first_name: customer.first_name || null,
-          last_name: customer.last_name || null,
-          phone: customer.phone || customer.default_address?.phone || null,
-          default_address: customer.default_address || null,
-          city: customer.default_address?.city || null,
-          country: customer.default_address?.country || null,
-          addresses: customer.addresses || [],
-          orders_count: customer.orders_count || 0,
-          total_spent: parseFloat(customer.total_spent) || 0,
-          currency: customer.currency || 'USD',
-          state: customer.state,
-          accepts_marketing: customer.accepts_marketing || false,
-          tags: customer.tags ? customer.tags.split(',').map(t => t.trim()) : [],
-          synced_at: new Date().toISOString(),
-        }
+    // Generate embeddings in batch
+    console.log(`[Qdrant] Generating embeddings for ${customerTexts.length} customers...`)
+    const embeddings = await generateEmbeddingBatch(customerTexts)
+    console.log(`[Qdrant] Embeddings generated successfully`)
+
+    // Convert customers to Qdrant points with embeddings
+    const points: QdrantPoint[] = customers.map((customer, index) => ({
+      id: `shopify-${storeId}-${customer.id}`,
+      vector: embeddings[index],
+      payload: {
+        store_id: storeId,
+        customer_id: customer.id.toString(),
+        platform: 'shopify',
+        email: customer.email,
+        first_name: customer.first_name || null,
+        last_name: customer.last_name || null,
+        phone: customer.phone || customer.default_address?.phone || null,
+        default_address: customer.default_address || null,
+        city: customer.default_address?.city || null,
+        country: customer.default_address?.country || null,
+        addresses: customer.addresses || [],
+        orders_count: customer.orders_count || 0,
+        total_spent: parseFloat(customer.total_spent) || 0,
+        currency: customer.currency || 'USD',
+        state: customer.state,
+        accepts_marketing: customer.accepts_marketing || false,
+        tags: customer.tags ? customer.tags.split(',').map(t => t.trim()) : [],
+        synced_at: new Date().toISOString(),
       }
-    })
+    }))
 
     await upsertPoints(collectionName, points)
     synced = points.length

+ 91 - 71
supabase/functions/woocommerce-sync/index.ts

@@ -18,7 +18,7 @@ import {
   scrollPoints,
   getCollectionName,
   initializeStoreCollections,
-  generateSimpleEmbedding,
+  generateEmbeddingBatch,
   createProductText,
   createOrderText,
   createCustomerText,
@@ -175,9 +175,9 @@ async function syncProductsToQdrant(
       })
     }
 
-    // Convert products to Qdrant points
-    const points: QdrantPoint[] = products.map((product) => {
-      const productText = createProductText({
+    // Generate text representations for all products
+    const productTexts = products.map((product) =>
+      createProductText({
         name: product.name,
         description: product.description,
         short_description: product.short_description,
@@ -188,24 +188,30 @@ async function syncProductsToQdrant(
         price: product.price,
         stock_status: product.stock_status,
       })
+    )
 
-      return {
-        id: `woocommerce-${storeId}-${product.id}`,
-        vector: generateSimpleEmbedding(productText),
-        payload: {
-          store_id: storeId,
-          product_id: product.id.toString(),
-          platform: 'woocommerce',
-          name: product.name,
-          sku: product.sku || null,
-          price: parseFloat(product.price) || 0,
-          stock_status: product.stock_status,
-          stock_quantity: product.stock_quantity,
-          type: product.type || 'simple',
-          synced_at: new Date().toISOString(),
-        }
+    // Generate embeddings in batch (more efficient)
+    console.log(`[Qdrant] Generating embeddings for ${productTexts.length} products...`)
+    const embeddings = await generateEmbeddingBatch(productTexts)
+    console.log(`[Qdrant] Embeddings generated successfully`)
+
+    // Convert products to Qdrant points with embeddings
+    const points: QdrantPoint[] = products.map((product, index) => ({
+      id: `woocommerce-${storeId}-${product.id}`,
+      vector: embeddings[index],
+      payload: {
+        store_id: storeId,
+        product_id: product.id.toString(),
+        platform: 'woocommerce',
+        name: product.name,
+        sku: product.sku || null,
+        price: parseFloat(product.price) || 0,
+        stock_status: product.stock_status,
+        stock_quantity: product.stock_quantity,
+        type: product.type || 'simple',
+        synced_at: new Date().toISOString(),
       }
-    })
+    }))
 
     await upsertPoints(collectionName, points)
     synced = points.length
@@ -270,8 +276,9 @@ async function syncOrdersToQdrant(
       ])
     }
 
-    const points: QdrantPoint[] = orders.map((order) => {
-      const orderText = createOrderText({
+    // Generate text representations for all orders
+    const orderTexts = orders.map((order) =>
+      createOrderText({
         order_number: order.number || order.id,
         customer_name: `${order.billing?.first_name || ''} ${order.billing?.last_name || ''}`.trim(),
         customer_email: order.billing?.email,
@@ -286,34 +293,40 @@ async function syncOrdersToQdrant(
         line_items: order.line_items,
         note: order.customer_note,
       })
+    )
 
-      return {
-        id: `woocommerce-${storeId}-${order.id}`,
-        vector: generateSimpleEmbedding(orderText),
-        payload: {
-          store_id: storeId,
-          order_id: order.id.toString(),
-          platform: 'woocommerce',
-          order_number: order.number || order.id.toString(),
-          status: order.status,
-          total: parseFloat(order.total) || 0,
-          total_price: parseFloat(order.total) || 0,
-          currency: order.currency || 'USD',
-          customer_name: `${order.billing?.first_name || ''} ${order.billing?.last_name || ''}`.trim(),
-          customer_email: order.billing?.email || null,
-          customer_phone: order.billing?.phone || null,
-          phone: order.billing?.phone || null,
-          billing_address: order.billing || null,
-          billing_city: order.billing?.city || null,
-          billing_country: order.billing?.country || null,
-          shipping_address: order.shipping || null,
-          shipping_city: order.shipping?.city || null,
-          shipping_country: order.shipping?.country || null,
-          note: order.customer_note || null,
-          synced_at: new Date().toISOString(),
-        }
+    // Generate embeddings in batch
+    console.log(`[Qdrant] Generating embeddings for ${orderTexts.length} orders...`)
+    const embeddings = await generateEmbeddingBatch(orderTexts)
+    console.log(`[Qdrant] Embeddings generated successfully`)
+
+    // Convert orders to Qdrant points with embeddings
+    const points: QdrantPoint[] = orders.map((order, index) => ({
+      id: `woocommerce-${storeId}-${order.id}`,
+      vector: embeddings[index],
+      payload: {
+        store_id: storeId,
+        order_id: order.id.toString(),
+        platform: 'woocommerce',
+        order_number: order.number || order.id.toString(),
+        status: order.status,
+        total: parseFloat(order.total) || 0,
+        total_price: parseFloat(order.total) || 0,
+        currency: order.currency || 'USD',
+        customer_name: `${order.billing?.first_name || ''} ${order.billing?.last_name || ''}`.trim(),
+        customer_email: order.billing?.email || null,
+        customer_phone: order.billing?.phone || null,
+        phone: order.billing?.phone || null,
+        billing_address: order.billing || null,
+        billing_city: order.billing?.city || null,
+        billing_country: order.billing?.country || null,
+        shipping_address: order.shipping || null,
+        shipping_city: order.shipping?.city || null,
+        shipping_country: order.shipping?.country || null,
+        note: order.customer_note || null,
+        synced_at: new Date().toISOString(),
       }
-    })
+    }))
 
     await upsertPoints(collectionName, points)
     synced = points.length
@@ -376,8 +389,9 @@ async function syncCustomersToQdrant(
       ])
     }
 
-    const points: QdrantPoint[] = customers.map((customer) => {
-      const customerText = createCustomerText({
+    // Generate text representations for all customers
+    const customerTexts = customers.map((customer) =>
+      createCustomerText({
         first_name: customer.first_name,
         last_name: customer.last_name,
         username: customer.username,
@@ -388,29 +402,35 @@ async function syncCustomersToQdrant(
         orders_count: customer.orders_count,
         total_spent: customer.total_spent,
       })
+    )
 
-      return {
-        id: `woocommerce-${storeId}-${customer.id}`,
-        vector: generateSimpleEmbedding(customerText),
-        payload: {
-          store_id: storeId,
-          customer_id: customer.id.toString(),
-          platform: 'woocommerce',
-          email: customer.email || null,
-          first_name: customer.first_name || null,
-          last_name: customer.last_name || null,
-          username: customer.username || null,
-          phone: customer.billing?.phone || customer.shipping?.phone || null,
-          billing_address: customer.billing || null,
-          shipping_address: customer.shipping || null,
-          city: customer.billing?.city || customer.shipping?.city || null,
-          country: customer.billing?.country || customer.shipping?.country || null,
-          orders_count: customer.orders_count || 0,
-          total_spent: parseFloat(customer.total_spent || '0'),
-          synced_at: new Date().toISOString(),
-        }
+    // Generate embeddings in batch
+    console.log(`[Qdrant] Generating embeddings for ${customerTexts.length} customers...`)
+    const embeddings = await generateEmbeddingBatch(customerTexts)
+    console.log(`[Qdrant] Embeddings generated successfully`)
+
+    // Convert customers to Qdrant points with embeddings
+    const points: QdrantPoint[] = customers.map((customer, index) => ({
+      id: `woocommerce-${storeId}-${customer.id}`,
+      vector: embeddings[index],
+      payload: {
+        store_id: storeId,
+        customer_id: customer.id.toString(),
+        platform: 'woocommerce',
+        email: customer.email || null,
+        first_name: customer.first_name || null,
+        last_name: customer.last_name || null,
+        username: customer.username || null,
+        phone: customer.billing?.phone || customer.shipping?.phone || null,
+        billing_address: customer.billing || null,
+        shipping_address: customer.shipping || null,
+        city: customer.billing?.city || customer.shipping?.city || null,
+        country: customer.billing?.country || customer.shipping?.country || null,
+        orders_count: customer.orders_count || 0,
+        total_spent: parseFloat(customer.total_spent || '0'),
+        synced_at: new Date().toISOString(),
       }
-    })
+    }))
 
     await upsertPoints(collectionName, points)
     synced = points.length