Quellcode durchsuchen

fix: WooCommerce sync GDPR compliance and embedding optimization #91

This commit fixes two critical issues in the WooCommerce sync:

1. **GDPR Compliance**: Removed SQL caching for orders and customers
   - Orders/customers now sync only to Qdrant (vector DB)
   - Removed attempts to write to non-existent cache tables
   - Complies with GDPR migration 20251031_160300
   - WebUI will use real-time API for order/customer data

2. **Embedding Optimization**: Implement change detection for products
   - Compare raw_data in SQL cache before generating embeddings
   - Skip embedding generation for unchanged products
   - Only regenerate embeddings when products are new or modified
   - Significantly reduces API costs and sync time

**Changes**:
- `syncOrders()`: Removed SQL cache upserts, Qdrant-only sync
- `syncCustomers()`: Removed SQL cache upserts, Qdrant-only sync
- `syncProductsToQdrant()`: Added change detection via raw_data comparison
- Added `skipped` counter to track unchanged products

**Impact**:
- ✅ Fixes 404 errors when syncing orders/customers
- ✅ Reduces unnecessary embedding API calls
- ✅ Maintains GDPR compliance
- ✅ Improves sync performance

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
Claude vor 5 Monaten
Ursprung
Commit
28d1062bd8
1 geänderte Dateien mit 78 neuen und 92 gelöschten Zeilen
  1. 78 92
      supabase/functions/woocommerce-sync/index.ts

+ 78 - 92
supabase/functions/woocommerce-sync/index.ts

@@ -128,7 +128,7 @@ async function syncProductsToQdrant(
   storeName: string,
   products: WooCommerceProduct[],
   supabaseAdmin: any
-): Promise<{ synced: number; errors: number }> {
+): Promise<{ synced: number; errors: number; skipped?: number }> {
   const startTime = new Date()
   const collectionName = getCollectionName(storeName, 'products')
 
@@ -136,6 +136,7 @@ async function syncProductsToQdrant(
 
   let synced = 0
   let errors = 0
+  let skipped = 0
 
   try {
     if (!(await collectionExists(collectionName))) {
@@ -149,7 +150,7 @@ async function syncProductsToQdrant(
       ])
     }
 
-    // Get existing product IDs from Qdrant to detect deletions
+    // Get existing product IDs from Qdrant to detect deletions and changes
     const existingPoints = await scrollPoints(collectionName, {
       must: [{ key: 'store_id', match: { value: storeId } }]
     }, 1000)
@@ -158,6 +159,14 @@ async function syncProductsToQdrant(
       existingPoints.points.map((p: any) => p.payload?.product_id).filter(Boolean)
     )
 
+    // Create a map of existing products with their synced_at timestamps
+    const existingProductMap = new Map(
+      existingPoints.points.map((p: any) => [
+        p.payload?.product_id,
+        p.payload?.synced_at
+      ])
+    )
+
     const currentProductIds = new Set(products.map(p => p.id.toString()))
 
     // Find deleted products
@@ -176,8 +185,45 @@ async function syncProductsToQdrant(
       })
     }
 
-    // Generate text representations for all products
-    const productTexts = products.map((product) =>
+    // Get SQL cache data to detect changes (check raw_data hashes)
+    const { data: cachedProducts } = await supabaseAdmin
+      .from('woocommerce_products_cache')
+      .select('wc_product_id, raw_data, last_synced_at')
+      .eq('store_id', storeId)
+      .in('wc_product_id', products.map(p => p.id.toString()))
+
+    const cachedProductMap = new Map(
+      (cachedProducts || []).map((cp: any) => [cp.wc_product_id, cp])
+    )
+
+    // Filter products that need embedding updates (new or changed)
+    const productsNeedingEmbeddings = products.filter((product) => {
+      const productId = product.id.toString()
+      const cached = cachedProductMap.get(productId)
+
+      // If not in cache, it's new - needs embedding
+      if (!cached) {
+        return true
+      }
+
+      // Compare raw_data to detect changes (deep comparison)
+      const currentHash = JSON.stringify(product)
+      const cachedHash = JSON.stringify(cached.raw_data)
+
+      if (currentHash !== cachedHash) {
+        console.log(`[Qdrant] Product ${productId} has changed, regenerating embedding`)
+        return true
+      }
+
+      // Product unchanged, skip embedding generation
+      skipped++
+      return false
+    })
+
+    console.log(`[Qdrant] ${productsNeedingEmbeddings.length} products need embeddings, ${skipped} unchanged`)
+
+    // Generate text representations only for products that need updates
+    const productTexts = productsNeedingEmbeddings.map((product) =>
       createProductText({
         name: product.name,
         description: product.description,
@@ -191,13 +237,16 @@ async function syncProductsToQdrant(
       })
     )
 
-    // Generate embeddings in batch (more efficient)
-    console.log(`[Qdrant] Generating embeddings for ${productTexts.length} products...`)
-    const embeddings = await generateEmbeddingBatch(productTexts)
-    console.log(`[Qdrant] Embeddings generated successfully`)
+    // Generate embeddings in batch (more efficient) - only for changed products
+    let embeddings: number[][] = []
+    if (productTexts.length > 0) {
+      console.log(`[Qdrant] Generating embeddings for ${productTexts.length} products...`)
+      embeddings = await generateEmbeddingBatch(productTexts)
+      console.log(`[Qdrant] Embeddings generated successfully`)
+    }
 
-    // Convert products to Qdrant points with embeddings and comprehensive details
-    const points: QdrantPoint[] = products.map((product, index) => ({
+    // Convert only changed/new products to Qdrant points with embeddings
+    const points: QdrantPoint[] = productsNeedingEmbeddings.map((product, index) => ({
       id: generatePointId('woocommerce', storeId, product.id),
       vector: embeddings[index],
       payload: {
@@ -249,8 +298,11 @@ async function syncProductsToQdrant(
       }
     }))
 
-    await upsertPoints(collectionName, points)
-    synced = points.length
+    // Only upsert if there are changed products
+    if (points.length > 0) {
+      await upsertPoints(collectionName, points)
+      synced = points.length
+    }
 
     await logQdrantSync(
       supabaseAdmin,
@@ -264,7 +316,7 @@ async function syncProductsToQdrant(
       startTime
     )
 
-    console.log(`[Qdrant] Products sync complete: ${synced} synced, ${deletedProductIds.length} deleted`)
+    console.log(`[Qdrant] Products sync complete: ${synced} synced, ${skipped} skipped (unchanged), ${deletedProductIds.length} deleted`)
   } catch (error: any) {
     console.error('[Qdrant] Product sync error:', error)
     errors = products.length
@@ -282,7 +334,7 @@ async function syncProductsToQdrant(
     )
   }
 
-  return { synced, errors }
+  return { synced, errors, skipped }
 }
 
 // Sync orders to Qdrant
@@ -596,7 +648,7 @@ async function syncProducts(
   }
 }
 
-// Sync orders from WooCommerce
+// Sync orders from WooCommerce (Qdrant only - no SQL cache per GDPR compliance)
 async function syncOrders(
   storeId: string,
   storeName: string,
@@ -606,7 +658,7 @@ async function syncOrders(
   qdrantEnabled: boolean,
   canSyncOrders: boolean
 ): Promise<{ synced: number; errors: number; errorMessage?: string; qdrant?: { synced: number; errors: number } }> {
-  console.log('[WooCommerce] Syncing orders...')
+  console.log('[WooCommerce] Syncing orders (Qdrant only - no SQL cache per GDPR)...')
   let synced = 0
   let errors = 0
   let page = 1
@@ -630,42 +682,8 @@ async function syncOrders(
 
       // Collect all orders for Qdrant sync
       allOrders.push(...orders)
-
-      // Map and upsert orders to SQL cache
-      const ordersToCache = orders.map((order: WooCommerceOrder) => ({
-        store_id: storeId,
-        wc_order_id: order.id.toString(),
-        order_number: order.number || order.id.toString(),
-        status: order.status,
-        total: parseFloat(order.total) || 0,
-        currency: order.currency || 'USD',
-        customer_name: `${order.billing?.first_name || ''} ${order.billing?.last_name || ''}`.trim(),
-        customer_email: order.billing?.email || null,
-        customer_phone: formatFirstValidPhone([
-          order.billing?.phone,
-          order.shipping?.phone
-        ], countryCode),
-        line_items: order.line_items || [],
-        billing_address: order.billing || null,
-        shipping_address: order.shipping || null,
-        created_at: order.date_created || new Date().toISOString(),
-        raw_data: order,
-        last_synced_at: new Date().toISOString()
-      }))
-
-      const { error: upsertError } = await supabaseAdmin
-        .from('woocommerce_orders_cache')
-        .upsert(ordersToCache, {
-          onConflict: 'store_id,wc_order_id'
-        })
-
-      if (upsertError) {
-        console.error('[WooCommerce] Error caching orders:', upsertError)
-        errors += ordersToCache.length
-      } else {
-        synced += ordersToCache.length
-        console.log(`[WooCommerce] Cached ${ordersToCache.length} orders (page ${page})`)
-      }
+      synced += orders.length
+      console.log(`[WooCommerce] Fetched ${orders.length} orders (page ${page}) for Qdrant sync`)
 
       // Check if there are more pages
       if (orders.length < perPage) {
@@ -675,9 +693,9 @@ async function syncOrders(
       page++
     }
 
-    console.log(`[WooCommerce] Orders sync complete: ${synced} synced, ${errors} errors`)
+    console.log(`[WooCommerce] Orders fetch complete: ${synced} fetched, ${errors} errors`)
 
-    // Sync to Qdrant if enabled
+    // Sync to Qdrant if enabled (GDPR-compliant: vector database only, no SQL cache)
     let qdrantResult
     if (qdrantEnabled && allOrders.length > 0) {
       qdrantResult = await syncOrdersToQdrant(storeId, storeName, allOrders, supabaseAdmin)
@@ -692,7 +710,7 @@ async function syncOrders(
   }
 }
 
-// Sync customers from WooCommerce
+// Sync customers from WooCommerce (Qdrant only - no SQL cache per GDPR compliance)
 async function syncCustomers(
   storeId: string,
   storeName: string,
@@ -702,7 +720,7 @@ async function syncCustomers(
   qdrantEnabled: boolean,
   canSyncCustomers: boolean
 ): Promise<{ synced: number; errors: number; errorMessage?: string; qdrant?: { synced: number; errors: number } }> {
-  console.log('[WooCommerce] Syncing customers...')
+  console.log('[WooCommerce] Syncing customers (Qdrant only - no SQL cache per GDPR)...')
   let synced = 0
   let errors = 0
   let page = 1
@@ -726,40 +744,8 @@ async function syncCustomers(
 
       // Collect all customers for Qdrant sync
       allCustomers.push(...customers)
-
-      // Map and upsert customers to SQL cache
-      const customersToCache = customers.map((customer: WooCommerceCustomer) => ({
-        store_id: storeId,
-        wc_customer_id: customer.id.toString(),
-        email: customer.email || null,
-        first_name: customer.first_name || null,
-        last_name: customer.last_name || null,
-        username: customer.username || null,
-        phone: formatFirstValidPhone([
-          customer.billing?.phone,
-          customer.shipping?.phone
-        ], countryCode),
-        billing_address: customer.billing || null,
-        shipping_address: customer.shipping || null,
-        orders_count: customer.orders_count || 0,
-        total_spent: parseFloat(customer.total_spent || '0'),
-        raw_data: customer,
-        last_synced_at: new Date().toISOString()
-      }))
-
-      const { error: upsertError } = await supabaseAdmin
-        .from('woocommerce_customers_cache')
-        .upsert(customersToCache, {
-          onConflict: 'store_id,wc_customer_id'
-        })
-
-      if (upsertError) {
-        console.error('[WooCommerce] Error caching customers:', upsertError)
-        errors += customersToCache.length
-      } else {
-        synced += customersToCache.length
-        console.log(`[WooCommerce] Cached ${customersToCache.length} customers (page ${page})`)
-      }
+      synced += customers.length
+      console.log(`[WooCommerce] Fetched ${customers.length} customers (page ${page}) for Qdrant sync`)
 
       // Check if there are more pages
       if (customers.length < perPage) {
@@ -769,9 +755,9 @@ async function syncCustomers(
       page++
     }
 
-    console.log(`[WooCommerce] Customers sync complete: ${synced} synced, ${errors} errors`)
+    console.log(`[WooCommerce] Customers fetch complete: ${synced} fetched, ${errors} errors`)
 
-    // Sync to Qdrant if enabled
+    // Sync to Qdrant if enabled (GDPR-compliant: vector database only, no SQL cache)
     let qdrantResult
     if (qdrantEnabled && allCustomers.length > 0) {
       qdrantResult = await syncCustomersToQdrant(storeId, storeName, allCustomers, supabaseAdmin)