ソースを参照

feat: implement ShopRenter product categories and tags extraction #108

- Add fetchCategory() function to shoprenter-client.ts for fetching category details
- Extract category IDs from productCategoryRelations[].category.href URLs
- Fetch and cache category details (name + description) for all products
- Clean HTML from category descriptions using cleanHtmlContent()
- Store categories as text array in Qdrant (e.g., 'CategoryName - Description')
- Extract tags from productTags[].tags field, split by comma
- Update both new/changed and reused product points with proper category/tag data
- Categories and tags now properly populated in Qdrant for AI embedding
Claude 4 ヶ月 前
コミット
25ea48a6d6

+ 9 - 0
supabase/functions/_shared/shoprenter-client.ts

@@ -620,6 +620,15 @@ export async function fetchProducts(storeId: string, page: number = 0, limit: nu
   )
 }
 
+// Fetch a single category by ID from ShopRenter
+export async function fetchCategory(storeId: string, categoryId: string): Promise<any> {
+  return shopRenterApiRequest(
+    storeId,
+    `/categoryExtend/${categoryId}?full=1`,
+    'GET'
+  )
+}
+
 export interface ShopRenterOrderFilters {
   status?: string;           // Filter by order status
   email?: string;            // Filter by customer email (correct parameter name)

+ 130 - 16
supabase/functions/shoprenter-sync/index.ts

@@ -1,8 +1,9 @@
 import { serve } from 'https://deno.land/std@0.168.0/http/server.ts'
 import { createClient } from 'https://esm.sh/@supabase/supabase-js@2'
 import { wrapHandler, logError } from '../_shared/error-handler.ts'
-import { fetchProducts, fetchOrders, fetchCustomers } from '../_shared/shoprenter-client.ts'
+import { fetchProducts, fetchOrders, fetchCustomers, fetchCategory } from '../_shared/shoprenter-client.ts'
 import { detectCountryCode } from '../_shared/phone-formatter.ts'
+import { cleanHtmlContent } from '../_shared/html-cleaner.ts'
 import {
   collectionExists,
   createCollection,
@@ -24,6 +25,72 @@ const corsHeaders = {
   'Access-Control-Allow-Headers': 'authorization, x-client-info, apikey, content-type',
 }
 
+/**
+ * Extract category ID from ShopRenter category href URL
+ * Example: "http://shopname.api.myshoprenter.hu/categories/Y2F0ZWdvcnktY2F0ZWdvcnlfaWQ9NTU=" -> "Y2F0ZWdvcnktY2F0ZWdvcnlfaWQ9NTU="
+ */
+function extractCategoryId(categoryHref: string): string | null {
+  if (!categoryHref) return null
+  const parts = categoryHref.split('/')
+  return parts[parts.length - 1] || null
+}
+
+/**
+ * Fetch and process category details from ShopRenter API
+ * Returns a text representation: "CategoryName - Description" (HTML cleaned)
+ */
+async function fetchCategoryText(storeId: string, categoryId: string): Promise<string | null> {
+  try {
+    const categoryData = await fetchCategory(storeId, categoryId)
+
+    // Extract first language description (Hungarian is typically first)
+    const categoryDesc = categoryData.categoryDescriptions?.[0]
+    if (!categoryDesc) return null
+
+    const name = categoryDesc.name || ''
+    const description = categoryDesc.description || ''
+
+    // Clean HTML from description
+    const cleanDesc = cleanHtmlContent(description)
+
+    // Concatenate name and description
+    if (name && cleanDesc) {
+      return `${name} - ${cleanDesc}`
+    } else if (name) {
+      return name
+    } else if (cleanDesc) {
+      return cleanDesc
+    }
+
+    return null
+  } catch (error) {
+    console.error(`[ShopRenter] Failed to fetch category ${categoryId}:`, error)
+    return null
+  }
+}
+
+/**
+ * Extract and process tags from ShopRenter productTags array
+ * ShopRenter format: [{ tags: "tag1,tag2,tag3" }]
+ * Returns: ["tag1", "tag2", "tag3"]
+ */
+function extractTags(productTags: any[]): string[] {
+  if (!productTags || !Array.isArray(productTags)) return []
+
+  const allTags: string[] = []
+
+  for (const tagObj of productTags) {
+    // productTags[].tags is a comma-separated string
+    if (tagObj.tags && typeof tagObj.tags === 'string') {
+      const tags = tagObj.tags.split(',').map((t: string) => t.trim()).filter(Boolean)
+      allTags.push(...tags)
+    }
+  }
+
+  // Remove duplicates
+  return [...new Set(allTags)]
+}
+
 // Log Qdrant sync operation
 async function logQdrantSync(
   supabaseAdmin: any,
@@ -157,13 +224,37 @@ async function syncProductsToQdrant(
 
     console.log(`[Qdrant] Products analysis: ${productsNeedingEmbedding.length} new/changed, ${productsToReuse.length} unchanged`)
 
+    // Collect all unique category IDs from all products
+    const uniqueCategoryIds = new Set<string>()
+    for (const product of productsNeedingEmbedding) {
+      const categoryRelations = product.productCategoryRelations || []
+      for (const rel of categoryRelations) {
+        const categoryId = extractCategoryId(rel.category?.href)
+        if (categoryId) {
+          uniqueCategoryIds.add(categoryId)
+        }
+      }
+    }
+
+    // Fetch all categories in parallel (with error handling for each)
+    console.log(`[Qdrant] Fetching ${uniqueCategoryIds.size} unique categories...`)
+    const categoryCache = new Map<string, string>()
+    const categoryPromises = Array.from(uniqueCategoryIds).map(async (categoryId) => {
+      const categoryText = await fetchCategoryText(storeId, categoryId)
+      if (categoryText) {
+        categoryCache.set(categoryId, categoryText)
+      }
+    })
+    await Promise.all(categoryPromises)
+    console.log(`[Qdrant] Fetched ${categoryCache.size} categories successfully`)
+
     // Generate text representations only for new/changed products
     const productTexts = productsNeedingEmbedding.map((product) => {
       // Extract first language description from productDescriptions array
       const productDesc = product.productDescriptions?.[0] || {}
 
-      // Extract tags from productTags array (ShopRenter structure: [{name: "tag1"}, ...])
-      const tags = (product.productTags || []).map((t: any) => t.name || t).filter(Boolean)
+      // Extract tags from productTags array (ShopRenter format: [{ tags: "tag1,tag2" }])
+      const tags = extractTags(product.productTags || [])
 
       // Extract attributes from productAttributeExtend array
       const attributes = (product.productAttributeExtend || []).map((attr: any) => {
@@ -173,12 +264,22 @@ async function syncProductsToQdrant(
         return null
       }).filter(Boolean)
 
+      // Extract categories and fetch their text representations
+      const categoryTexts: string[] = []
+      const categoryRelations = product.productCategoryRelations || []
+      for (const rel of categoryRelations) {
+        const categoryId = extractCategoryId(rel.category?.href)
+        if (categoryId && categoryCache.has(categoryId)) {
+          categoryTexts.push(categoryCache.get(categoryId)!)
+        }
+      }
+
       return createProductText({
         name: productDesc.name || product.name,
         description: productDesc.description || null,
         short_description: productDesc.shortDescription || null,
         sku: product.sku,
-        categories: product.categories || [],
+        categories: categoryTexts,
         tags: tags,
         attributes: attributes,
         price: product.price,
@@ -201,8 +302,8 @@ async function syncProductsToQdrant(
       // Extract first language description from productDescriptions array
       const productDesc = product.productDescriptions?.[0] || {}
 
-      // Extract tags from productTags array
-      const tags = (product.productTags || []).map((t: any) => t.name || t).filter(Boolean)
+      // Extract tags from productTags array (ShopRenter format: [{ tags: "tag1,tag2" }])
+      const tags = extractTags(product.productTags || [])
 
       // Extract attributes from productAttributeExtend array
       const attributes = (product.productAttributeExtend || []).map((attr: any) => {
@@ -212,10 +313,15 @@ async function syncProductsToQdrant(
         return null
       }).filter(Boolean)
 
-      // Extract categories from productCategoryRelations (filter out href/URL fields)
-      const categories = (product.productCategoryRelations || []).map((rel: any) => ({
-        id: rel.category?.id || null,
-      })).filter((cat: any) => cat.id)
+      // Extract categories and get their text representations
+      const categoryTexts: string[] = []
+      const categoryRelations = product.productCategoryRelations || []
+      for (const rel of categoryRelations) {
+        const categoryId = extractCategoryId(rel.category?.href)
+        if (categoryId && categoryCache.has(categoryId)) {
+          categoryTexts.push(categoryCache.get(categoryId)!)
+        }
+      }
 
       // Extract manufacturer info (filter out href)
       const manufacturer = product.manufacturer?.name || null
@@ -240,8 +346,8 @@ async function syncProductsToQdrant(
           short_description: productDesc.shortDescription || null,
           meta_description: productDesc.metaDescription || null,
 
-          // Categorization
-          categories: categories,
+          // Categorization (stored as text arrays for embedding)
+          categories: categoryTexts,
           tags: tags,
 
           // Attributes
@@ -273,16 +379,24 @@ async function syncProductsToQdrant(
     // Create points for unchanged products (reuse existing embeddings and update metadata)
     const reusedPoints: QdrantPoint[] = productsToReuse.map(({ product, existingPoint }) => {
       const productDesc = product.productDescriptions?.[0] || {}
-      const tags = (product.productTags || []).map((t: any) => t.name || t).filter(Boolean)
+      const tags = extractTags(product.productTags || [])
       const attributes = (product.productAttributeExtend || []).map((attr: any) => {
         if (attr.name && attr.value) {
           return { name: attr.name, value: attr.value }
         }
         return null
       }).filter(Boolean)
-      const categories = (product.productCategoryRelations || []).map((rel: any) => ({
-        id: rel.category?.id || null,
-      })).filter((cat: any) => cat.id)
+
+      // Extract categories and get their text representations
+      const categoryTexts: string[] = []
+      const categoryRelations = product.productCategoryRelations || []
+      for (const rel of categoryRelations) {
+        const categoryId = extractCategoryId(rel.category?.href)
+        if (categoryId && categoryCache.has(categoryId)) {
+          categoryTexts.push(categoryCache.get(categoryId)!)
+        }
+      }
+
       const manufacturer = product.manufacturer?.name || null
 
       return {