Browse Source

docs: add scraper API documentation for reference

Fszontagh 4 months ago
parent
commit
936cf53eff
1 changed files with 929 additions and 0 deletions
  1. 929 0
      docs/scraper_docs.html

+ 929 - 0
docs/scraper_docs.html

@@ -0,0 +1,929 @@
+
+      <!DOCTYPE html>
+      <html>
+        <head>
+          <title>Webshop Scraper API Documentation</title>
+          <style>
+            body { font-family: Arial, sans-serif; max-width: 1200px; margin: 0 auto; padding: 20px; }
+            .endpoint { border: 1px solid #ddd; margin: 10px 0; padding: 15px; border-radius: 5px; }
+            .endpoint-header { margin-bottom: 10px; display: flex; align-items: center; gap: 10px; }
+            .method { padding: 4px 8px; border-radius: 3px; color: white; font-size: 12px; font-weight: bold; }
+            .method-get { background-color: #61affe; }
+            .method-post { background-color: #49cc90; }
+            .method-put { background-color: #fca130; }
+            .method-patch { background-color: #50e3c2; }
+            .method-delete { background-color: #f93e3e; }
+            .path { font-family: monospace; background: #f5f5f5; padding: 4px 8px; border-radius: 3px; }
+            .auth-required { background: #ff6b6b; color: white; padding: 2px 6px; border-radius: 3px; font-size: 11px; }
+            .parameters, .responses { margin-left: 20px; }
+            .param-type { color: #666; font-style: italic; }
+            .required { color: red; }
+            pre { background: #f5f5f5; padding: 10px; border-radius: 3px; overflow-x: auto; }
+            h3 { color: #333; border-bottom: 2px solid #eee; padding-bottom: 10px; }
+            h4 { margin: 0 0 10px 0; color: #555; }
+            h5 { margin: 15px 0 5px 0; color: #666; }
+          </style>
+        </head>
+        <body>
+          <h1>Webshop Scraper API Documentation</h1>
+          <p>Generated automatically from registered endpoints.</p>
+          
+          <section>
+            <h3>Health</h3>
+            
+          <div class="endpoint">
+            <div class="endpoint-header">
+              <span class="method method-get">GET</span>
+              <span class="path">/health</span>
+              
+            </div>
+            <h4>Health check endpoint</h4>
+            <p>Returns the health status of the application and queue statistics</p>
+
+            
+
+            
+
+            
+              <h5>Responses</h5>
+              <ul class="responses">
+                
+                  <li><strong>200</strong> - Application health status</li>
+                
+              </ul>
+            
+          </div>
+        
+          </section>
+        
+          <section>
+            <h3>Jobs</h3>
+            
+          <div class="endpoint">
+            <div class="endpoint-header">
+              <span class="method method-post">POST</span>
+              <span class="path">/api/jobs</span>
+              <span class="auth-required">🔒 Auth Required</span>
+            </div>
+            <h4>Create a new scraping job</h4>
+            <p>Creates a new job to scrape a webshop. If database is available, creates or finds the shop record.</p>
+
+            
+
+            
+              <h5>Request Body</h5>
+              <pre><code>{
+  "type": "object",
+  "properties": {
+    "url": {
+      "type": "string",
+      "format": "url",
+      "description": "The URL of the webshop to scrape"
+    },
+    "custom_id": {
+      "type": "string",
+      "pattern": "^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$",
+      "description": "Optional custom UUID for the shop"
+    }
+  },
+  "required": [
+    "url"
+  ]
+}</code></pre>
+            
+
+            
+              <h5>Responses</h5>
+              <ul class="responses">
+                
+                  <li><strong>201</strong> - Job created successfully</li>
+                
+                  <li><strong>400</strong> - Invalid request data</li>
+                
+                  <li><strong>409</strong> - Custom ID already in use</li>
+                
+              </ul>
+            
+          </div>
+        
+          <div class="endpoint">
+            <div class="endpoint-header">
+              <span class="method method-get">GET</span>
+              <span class="path">/api/jobs/:id</span>
+              <span class="auth-required">🔒 Auth Required</span>
+            </div>
+            <h4>Get job status and result</h4>
+            <p>Retrieves the status and result of a specific job</p>
+
+            
+              <h5>Parameters</h5>
+              <ul class="parameters">
+                
+                  <li>
+                    <strong>id</strong>
+                    <span class="param-type">(path: string)</span>
+                    <span class="required">*</span>
+                     - Job ID
+                  </li>
+                
+              </ul>
+            
+
+            
+
+            
+              <h5>Responses</h5>
+              <ul class="responses">
+                
+                  <li><strong>200</strong> - Job details</li>
+                
+                  <li><strong>404</strong> - Job not found</li>
+                
+              </ul>
+            
+          </div>
+        
+          <div class="endpoint">
+            <div class="endpoint-header">
+              <span class="method method-get">GET</span>
+              <span class="path">/api/jobs</span>
+              <span class="auth-required">🔒 Auth Required</span>
+            </div>
+            <h4>List all jobs</h4>
+            <p>Retrieves a list of all jobs with their current status and queue statistics</p>
+
+            
+
+            
+
+            
+              <h5>Responses</h5>
+              <ul class="responses">
+                
+                  <li><strong>200</strong> - List of jobs</li>
+                
+              </ul>
+            
+          </div>
+        
+          </section>
+        
+          <section>
+            <h3>Shops</h3>
+            
+          <div class="endpoint">
+            <div class="endpoint-header">
+              <span class="method method-get">GET</span>
+              <span class="path">/api/shops</span>
+              <span class="auth-required">🔒 Auth Required</span>
+            </div>
+            <h4>List all shops</h4>
+            <p>Retrieves a list of all shops with their analytics</p>
+
+            
+
+            
+
+            
+              <h5>Responses</h5>
+              <ul class="responses">
+                
+                  <li><strong>200</strong> - List of shops with analytics</li>
+                
+                  <li><strong>503</strong> - Database not available</li>
+                
+              </ul>
+            
+          </div>
+        
+          <div class="endpoint">
+            <div class="endpoint-header">
+              <span class="method method-get">GET</span>
+              <span class="path">/api/shops/:id</span>
+              <span class="auth-required">🔒 Auth Required</span>
+            </div>
+            <h4>Get detailed shop information</h4>
+            <p>Retrieves detailed information about a specific shop including analytics, content metadata, scrape history, and scheduled jobs</p>
+
+            
+              <h5>Parameters</h5>
+              <ul class="parameters">
+                
+                  <li>
+                    <strong>id</strong>
+                    <span class="param-type">(path: string)</span>
+                    <span class="required">*</span>
+                     - Shop ID (internal UUID or custom UUID)
+                  </li>
+                
+              </ul>
+            
+
+            
+
+            
+              <h5>Responses</h5>
+              <ul class="responses">
+                
+                  <li><strong>200</strong> - Detailed shop information</li>
+                
+                  <li><strong>404</strong> - Shop not found</li>
+                
+              </ul>
+            
+          </div>
+        
+          <div class="endpoint">
+            <div class="endpoint-header">
+              <span class="method method-get">GET</span>
+              <span class="path">/api/shops/:id/results</span>
+              <span class="auth-required">🔒 Auth Required</span>
+            </div>
+            <h4>Get shop results with full content</h4>
+            <p>Retrieves scraped content for a shop with optional filtering</p>
+
+            
+              <h5>Parameters</h5>
+              <ul class="parameters">
+                
+                  <li>
+                    <strong>id</strong>
+                    <span class="param-type">(path: string)</span>
+                    <span class="required">*</span>
+                     - Shop ID (internal UUID or custom UUID)
+                  </li>
+                
+                  <li>
+                    <strong>limit</strong>
+                    <span class="param-type">(query: number)</span>
+                    
+                     - Maximum number of results to return
+                  </li>
+                
+                  <li>
+                    <strong>date_from</strong>
+                    <span class="param-type">(query: string)</span>
+                    
+                     - Filter results from this date (ISO format)
+                  </li>
+                
+                  <li>
+                    <strong>date_to</strong>
+                    <span class="param-type">(query: string)</span>
+                    
+                     - Filter results up to this date (ISO format)
+                  </li>
+                
+                  <li>
+                    <strong>content_type</strong>
+                    <span class="param-type">(query: string)</span>
+                    
+                     - Filter by content type: shipping, contacts, terms, faq
+                  </li>
+                
+              </ul>
+            
+
+            
+
+            
+              <h5>Responses</h5>
+              <ul class="responses">
+                
+                  <li><strong>200</strong> - Shop results with content</li>
+                
+                  <li><strong>404</strong> - Shop not found</li>
+                
+              </ul>
+            
+          </div>
+        
+          <div class="endpoint">
+            <div class="endpoint-header">
+              <span class="method method-patch">PATCH</span>
+              <span class="path">/api/shops/:id/schedule</span>
+              <span class="auth-required">🔒 Auth Required</span>
+            </div>
+            <h4>Enable or disable scheduled scraping</h4>
+            <p>Updates the scheduling status for a shop</p>
+
+            
+              <h5>Parameters</h5>
+              <ul class="parameters">
+                
+                  <li>
+                    <strong>id</strong>
+                    <span class="param-type">(path: string)</span>
+                    <span class="required">*</span>
+                     - Shop ID
+                  </li>
+                
+              </ul>
+            
+
+            
+              <h5>Request Body</h5>
+              <pre><code>{
+  "type": "object",
+  "properties": {
+    "enabled": {
+      "type": "boolean",
+      "description": "Whether to enable or disable scheduling"
+    }
+  },
+  "required": [
+    "enabled"
+  ]
+}</code></pre>
+            
+
+            
+              <h5>Responses</h5>
+              <ul class="responses">
+                
+                  <li><strong>200</strong> - Schedule status updated</li>
+                
+                  <li><strong>404</strong> - Shop not found</li>
+                
+              </ul>
+            
+          </div>
+        
+          <div class="endpoint">
+            <div class="endpoint-header">
+              <span class="method method-patch">PATCH</span>
+              <span class="path">/api/shops/:id/custom-id</span>
+              <span class="auth-required">🔒 Auth Required</span>
+            </div>
+            <h4>Set or update custom ID for a shop</h4>
+            <p>Updates the custom UUID identifier for a shop</p>
+
+            
+              <h5>Parameters</h5>
+              <ul class="parameters">
+                
+                  <li>
+                    <strong>id</strong>
+                    <span class="param-type">(path: string)</span>
+                    <span class="required">*</span>
+                     - Shop ID (internal UUID or current custom UUID)
+                  </li>
+                
+              </ul>
+            
+
+            
+              <h5>Request Body</h5>
+              <pre><code>{
+  "type": "object",
+  "properties": {
+    "custom_id": {
+      "type": "string",
+      "nullable": true,
+      "pattern": "^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$",
+      "description": "Custom UUID or null to remove"
+    }
+  },
+  "required": [
+    "custom_id"
+  ]
+}</code></pre>
+            
+
+            
+              <h5>Responses</h5>
+              <ul class="responses">
+                
+                  <li><strong>200</strong> - Custom ID updated</li>
+                
+                  <li><strong>400</strong> - Invalid custom ID format</li>
+                
+                  <li><strong>404</strong> - Shop not found</li>
+                
+                  <li><strong>409</strong> - Custom ID already in use</li>
+                
+              </ul>
+            
+          </div>
+        
+          <div class="endpoint">
+            <div class="endpoint-header">
+              <span class="method method-delete">DELETE</span>
+              <span class="path">/api/shops/:id</span>
+              <span class="auth-required">🔒 Auth Required</span>
+            </div>
+            <h4>Delete a shop and all related data</h4>
+            <p>Permanently removes a shop and all associated data including scrape history, scheduled jobs, and content</p>
+
+            
+              <h5>Parameters</h5>
+              <ul class="parameters">
+                
+                  <li>
+                    <strong>id</strong>
+                    <span class="param-type">(path: string)</span>
+                    <span class="required">*</span>
+                     - Shop ID (internal UUID or custom UUID)
+                  </li>
+                
+              </ul>
+            
+
+            
+
+            
+              <h5>Responses</h5>
+              <ul class="responses">
+                
+                  <li><strong>200</strong> - Shop deleted successfully</li>
+                
+                  <li><strong>404</strong> - Shop not found</li>
+                
+              </ul>
+            
+          </div>
+        
+          </section>
+        
+          <section>
+            <h3>Webhooks</h3>
+            
+          <div class="endpoint">
+            <div class="endpoint-header">
+              <span class="method method-post">POST</span>
+              <span class="path">/api/shops/:id/webhooks</span>
+              <span class="auth-required">🔒 Auth Required</span>
+            </div>
+            <h4>Create or replace webhook for a shop</h4>
+            <p>Creates a new webhook or updates an existing one for the specified shop</p>
+
+            
+              <h5>Parameters</h5>
+              <ul class="parameters">
+                
+                  <li>
+                    <strong>id</strong>
+                    <span class="param-type">(path: string)</span>
+                    <span class="required">*</span>
+                     - Shop ID
+                  </li>
+                
+              </ul>
+            
+
+            
+              <h5>Request Body</h5>
+              <pre><code>{
+  "type": "object",
+  "properties": {
+    "url": {
+      "type": "string",
+      "format": "url",
+      "description": "Webhook URL"
+    },
+    "secret": {
+      "type": "string",
+      "description": "Optional webhook secret for verification"
+    }
+  },
+  "required": [
+    "url"
+  ]
+}</code></pre>
+            
+
+            
+              <h5>Responses</h5>
+              <ul class="responses">
+                
+                  <li><strong>200</strong> - Webhook updated</li>
+                
+                  <li><strong>201</strong> - Webhook created</li>
+                
+                  <li><strong>400</strong> - Invalid webhook URL</li>
+                
+                  <li><strong>404</strong> - Shop not found</li>
+                
+              </ul>
+            
+          </div>
+        
+          <div class="endpoint">
+            <div class="endpoint-header">
+              <span class="method method-get">GET</span>
+              <span class="path">/api/shops/:id/webhooks</span>
+              <span class="auth-required">🔒 Auth Required</span>
+            </div>
+            <h4>Get webhook configuration for a shop</h4>
+            <p>Retrieves webhook configuration and recent delivery history</p>
+
+            
+              <h5>Parameters</h5>
+              <ul class="parameters">
+                
+                  <li>
+                    <strong>id</strong>
+                    <span class="param-type">(path: string)</span>
+                    <span class="required">*</span>
+                     - Shop ID (internal UUID or custom UUID)
+                  </li>
+                
+              </ul>
+            
+
+            
+
+            
+              <h5>Responses</h5>
+              <ul class="responses">
+                
+                  <li><strong>200</strong> - Webhook configuration and delivery history</li>
+                
+                  <li><strong>404</strong> - Shop or webhook not found</li>
+                
+              </ul>
+            
+          </div>
+        
+          <div class="endpoint">
+            <div class="endpoint-header">
+              <span class="method method-patch">PATCH</span>
+              <span class="path">/api/shops/:id/webhooks</span>
+              <span class="auth-required">🔒 Auth Required</span>
+            </div>
+            <h4>Enable or disable webhook</h4>
+            <p>Updates the enabled status of a webhook</p>
+
+            
+              <h5>Parameters</h5>
+              <ul class="parameters">
+                
+                  <li>
+                    <strong>id</strong>
+                    <span class="param-type">(path: string)</span>
+                    <span class="required">*</span>
+                     - Shop ID
+                  </li>
+                
+              </ul>
+            
+
+            
+              <h5>Request Body</h5>
+              <pre><code>{
+  "type": "object",
+  "properties": {
+    "enabled": {
+      "type": "boolean",
+      "description": "Whether to enable or disable the webhook"
+    }
+  },
+  "required": [
+    "enabled"
+  ]
+}</code></pre>
+            
+
+            
+              <h5>Responses</h5>
+              <ul class="responses">
+                
+                  <li><strong>200</strong> - Webhook status updated</li>
+                
+                  <li><strong>404</strong> - Shop or webhook not found</li>
+                
+              </ul>
+            
+          </div>
+        
+          <div class="endpoint">
+            <div class="endpoint-header">
+              <span class="method method-delete">DELETE</span>
+              <span class="path">/api/shops/:id/webhooks</span>
+              <span class="auth-required">🔒 Auth Required</span>
+            </div>
+            <h4>Delete webhook for a shop</h4>
+            <p>Permanently removes the webhook configuration for a shop</p>
+
+            
+              <h5>Parameters</h5>
+              <ul class="parameters">
+                
+                  <li>
+                    <strong>id</strong>
+                    <span class="param-type">(path: string)</span>
+                    <span class="required">*</span>
+                     - Shop ID (internal UUID or custom UUID)
+                  </li>
+                
+              </ul>
+            
+
+            
+
+            
+              <h5>Responses</h5>
+              <ul class="responses">
+                
+                  <li><strong>200</strong> - Webhook deleted successfully</li>
+                
+                  <li><strong>404</strong> - Shop or webhook not found</li>
+                
+              </ul>
+            
+          </div>
+        
+          </section>
+        
+          <section>
+            <h3>Custom URLs</h3>
+            
+          <div class="endpoint">
+            <div class="endpoint-header">
+              <span class="method method-post">POST</span>
+              <span class="path">/api/shops/:id/custom-urls</span>
+              <span class="auth-required">🔒 Auth Required</span>
+            </div>
+            <h4>Add a custom URL to scrape for a shop</h4>
+            <p>Adds a custom URL to be scraped for specific content type</p>
+
+            
+              <h5>Parameters</h5>
+              <ul class="parameters">
+                
+                  <li>
+                    <strong>id</strong>
+                    <span class="param-type">(path: string)</span>
+                    <span class="required">*</span>
+                     - Shop ID
+                  </li>
+                
+              </ul>
+            
+
+            
+              <h5>Request Body</h5>
+              <pre><code>{
+  "type": "object",
+  "properties": {
+    "url": {
+      "type": "string",
+      "format": "url",
+      "description": "URL to scrape (must belong to the same domain as the shop)"
+    },
+    "content_type": {
+      "type": "string",
+      "enum": [
+        "shipping",
+        "contacts",
+        "terms",
+        "faq"
+      ],
+      "description": "Type of content to scrape from this URL"
+    }
+  },
+  "required": [
+    "url",
+    "content_type"
+  ]
+}</code></pre>
+            
+
+            
+              <h5>Responses</h5>
+              <ul class="responses">
+                
+                  <li><strong>201</strong> - Custom URL added successfully</li>
+                
+                  <li><strong>400</strong> - Invalid request data or URL not in same domain</li>
+                
+                  <li><strong>404</strong> - Shop not found</li>
+                
+                  <li><strong>409</strong> - URL already exists or already scraped</li>
+                
+              </ul>
+            
+          </div>
+        
+          <div class="endpoint">
+            <div class="endpoint-header">
+              <span class="method method-get">GET</span>
+              <span class="path">/api/shops/:id/custom-urls</span>
+              <span class="auth-required">🔒 Auth Required</span>
+            </div>
+            <h4>List all custom URLs for a shop</h4>
+            <p>Retrieves all custom URLs configured for a shop</p>
+
+            
+              <h5>Parameters</h5>
+              <ul class="parameters">
+                
+                  <li>
+                    <strong>id</strong>
+                    <span class="param-type">(path: string)</span>
+                    <span class="required">*</span>
+                     - Shop ID (internal UUID or custom UUID)
+                  </li>
+                
+              </ul>
+            
+
+            
+
+            
+              <h5>Responses</h5>
+              <ul class="responses">
+                
+                  <li><strong>200</strong> - List of custom URLs</li>
+                
+                  <li><strong>404</strong> - Shop not found</li>
+                
+              </ul>
+            
+          </div>
+        
+          <div class="endpoint">
+            <div class="endpoint-header">
+              <span class="method method-patch">PATCH</span>
+              <span class="path">/api/shops/:id/custom-urls/:customUrlId</span>
+              <span class="auth-required">🔒 Auth Required</span>
+            </div>
+            <h4>Enable or disable a custom URL</h4>
+            <p>Updates the enabled status of a custom URL</p>
+
+            
+              <h5>Parameters</h5>
+              <ul class="parameters">
+                
+                  <li>
+                    <strong>id</strong>
+                    <span class="param-type">(path: string)</span>
+                    <span class="required">*</span>
+                     - Shop ID
+                  </li>
+                
+                  <li>
+                    <strong>customUrlId</strong>
+                    <span class="param-type">(path: string)</span>
+                    <span class="required">*</span>
+                     - Custom URL ID
+                  </li>
+                
+              </ul>
+            
+
+            
+              <h5>Request Body</h5>
+              <pre><code>{
+  "type": "object",
+  "properties": {
+    "enabled": {
+      "type": "boolean",
+      "description": "Whether to enable or disable the custom URL"
+    }
+  },
+  "required": [
+    "enabled"
+  ]
+}</code></pre>
+            
+
+            
+              <h5>Responses</h5>
+              <ul class="responses">
+                
+                  <li><strong>200</strong> - Custom URL status updated</li>
+                
+                  <li><strong>403</strong> - Custom URL does not belong to this shop</li>
+                
+                  <li><strong>404</strong> - Shop or custom URL not found</li>
+                
+              </ul>
+            
+          </div>
+        
+          <div class="endpoint">
+            <div class="endpoint-header">
+              <span class="method method-delete">DELETE</span>
+              <span class="path">/api/shops/:id/custom-urls/:customUrlId</span>
+              <span class="auth-required">🔒 Auth Required</span>
+            </div>
+            <h4>Delete a custom URL</h4>
+            <p>Permanently removes a custom URL from the shop configuration</p>
+
+            
+              <h5>Parameters</h5>
+              <ul class="parameters">
+                
+                  <li>
+                    <strong>id</strong>
+                    <span class="param-type">(path: string)</span>
+                    <span class="required">*</span>
+                     - Shop ID
+                  </li>
+                
+                  <li>
+                    <strong>customUrlId</strong>
+                    <span class="param-type">(path: string)</span>
+                    <span class="required">*</span>
+                     - Custom URL ID
+                  </li>
+                
+              </ul>
+            
+
+            
+
+            
+              <h5>Responses</h5>
+              <ul class="responses">
+                
+                  <li><strong>200</strong> - Custom URL deleted successfully</li>
+                
+                  <li><strong>403</strong> - Custom URL does not belong to this shop</li>
+                
+                  <li><strong>404</strong> - Shop or custom URL not found</li>
+                
+              </ul>
+            
+          </div>
+        
+          </section>
+        
+          <section>
+            <h3>Documentation</h3>
+            
+          <div class="endpoint">
+            <div class="endpoint-header">
+              <span class="method method-get">GET</span>
+              <span class="path">/doc</span>
+              
+            </div>
+            <h4>Get API Documentation</h4>
+            <p>Returns interactive HTML documentation for all registered API endpoints</p>
+
+            
+
+            
+
+            
+              <h5>Responses</h5>
+              <ul class="responses">
+                
+                  <li><strong>200</strong> - HTML documentation page</li>
+                
+              </ul>
+            
+          </div>
+        
+          <div class="endpoint">
+            <div class="endpoint-header">
+              <span class="method method-get">GET</span>
+              <span class="path">/doc/openapi</span>
+              
+            </div>
+            <h4>Get OpenAPI Specification</h4>
+            <p>Returns the OpenAPI 3.0 specification for the API in JSON format</p>
+
+            
+
+            
+
+            
+              <h5>Responses</h5>
+              <ul class="responses">
+                
+                  <li><strong>200</strong> - OpenAPI specification</li>
+                
+              </ul>
+            
+          </div>
+        
+          <div class="endpoint">
+            <div class="endpoint-header">
+              <span class="method method-get">GET</span>
+              <span class="path">/doc/endpoints</span>
+              
+            </div>
+            <h4>List All Endpoints</h4>
+            <p>Returns a JSON list of all registered endpoints with their metadata</p>
+
+            
+
+            
+
+            
+              <h5>Responses</h5>
+              <ul class="responses">
+                
+                  <li><strong>200</strong> - List of endpoints</li>
+                
+              </ul>
+            
+          </div>
+        
+          </section>
+        
+        </body>
+      </html>
+