🔥 Scraper Improvements: - Add JSON-LD price extraction (regression fix) - Fix sleeping spotsPerUnit bug (was hardcoded to 2) - Remove stale CSS selectors, add robust fallbacks - Add JSON-LD price fallback in extraction pipeline - Improve sleeping parser regex (lastIndex bug fix) - Add 15+ new bed type patterns (murphy, day bed, hammock, plurals) - Smarter deriveSleepingFromBeds() with mixed bed logic 📅 Import Form UX: - Smart defaults (next weekend dates) - Auto-calculate nights display - URL param auto-detection (?check_in=&check_out=&adults=) - Better visual hierarchy with icons - Progress steps during import - Success redirect to listing detail page 🗑️ Delete Button Fix: - Add router.refresh() after successful delete - Inline error state instead of alert() - Admin delete button as proper client component ✏️ Edit/Admin Fixes: - Fix revalidatePath using slug instead of id - Fix redirect to detail page after edit - Add cascade delete logic to admin deleteListing - Extract delete to proper client component 🎨 UI States for Partial Data: - Price: 'Preis auf Anfrage' with context hint - Location: 'Ort nicht erkannt' instead of empty - Sleeping: placeholder when no data - Suitability: 3-state (yes/no/unknown) - Use formatPrice/formatRating utilities 🛏️ Sleeping Data Quality: - Add sleepingDataQuality to Prisma schema - Save quality (EXACT/DERIVED/UNKNOWN) to DB - Display '(geschätzt)' label for derived data 📊 Database: - Restore corrupted schema.prisma from git - Add sleepingDataQuality field - Push schema changes ✅ TypeScript: Zero errors ✅ Build: Successful
128 lines
4.5 KiB
TypeScript
128 lines
4.5 KiB
TypeScript
/**
|
|
* Test script for Puppeteer-based Airbnb scraper
|
|
* Run with: npx tsx test-scraper.ts
|
|
*/
|
|
|
|
import { scrapeAirbnbWithPuppeteer } from './src/lib/airbnb/puppeteer-scraper';
|
|
|
|
const TEST_URL = 'https://www.airbnb.com/rooms/52367822'; // Valid listing in Bad Bellingen, Germany
|
|
|
|
async function main() {
|
|
console.log('========================================');
|
|
console.log('Airbnb Puppeteer Scraper Test');
|
|
console.log('========================================\n');
|
|
|
|
console.log(`Testing URL: ${TEST_URL}\n`);
|
|
console.log('Starting scraper (this may take 30-60 seconds)...\n');
|
|
|
|
const startTime = Date.now();
|
|
|
|
try {
|
|
const result = await scrapeAirbnbWithPuppeteer(TEST_URL);
|
|
|
|
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
|
console.log(`\n✅ Scraping completed in ${elapsed}s\n`);
|
|
|
|
if (!result) {
|
|
console.log('❌ Result is null - scraping may have failed');
|
|
return;
|
|
}
|
|
|
|
console.log('========================================');
|
|
console.log('EXTRACTED DATA');
|
|
console.log('========================================\n');
|
|
|
|
// Title
|
|
console.log('📌 TITLE:');
|
|
console.log(` Value: ${result.title?.value || 'N/A'}`);
|
|
console.log(` Source: ${result.title?.source || 'N/A'}`);
|
|
console.log(` Confidence: ${result.title?.confidence || 'N/A'}\n`);
|
|
|
|
// Price
|
|
console.log('💰 PRICE:');
|
|
console.log(` Nightly: ${result.nightlyPrice?.value || 'N/A'} EUR`);
|
|
console.log(` Total: ${result.totalPrice?.value || 'N/A'} EUR`);
|
|
console.log(` Status: ${result.priceStatus || 'N/A'}\n`);
|
|
|
|
// Location
|
|
console.log('📍 LOCATION:');
|
|
console.log(` Text: ${result.locationText?.value || 'N/A'}`);
|
|
console.log(` Lat/Lng: ${result.latitude}, ${result.longitude}\n`);
|
|
|
|
// Rating
|
|
console.log('⭐ RATING:');
|
|
console.log(` Rating: ${result.rating?.value || 'N/A'}`);
|
|
console.log(` Reviews: ${result.reviewCount?.value || 'N/A'}\n`);
|
|
|
|
// Capacity
|
|
console.log('🏠 CAPACITY:');
|
|
console.log(` Guests: ${result.guestCount?.value || 'N/A'}`);
|
|
console.log(` Bedrooms: ${result.bedrooms?.value || 'N/A'}`);
|
|
console.log(` Beds: ${result.beds?.value || 'N/A'}`);
|
|
console.log(` Bathrooms: ${result.bathrooms?.value || 'N/A'}\n`);
|
|
|
|
// Sleeping Options
|
|
console.log('🛏️ SLEEPING OPTIONS:');
|
|
if (result.sleepingOptions && result.sleepingOptions.length > 0) {
|
|
result.sleepingOptions.forEach((opt, i) => {
|
|
console.log(` ${i + 1}. ${opt.quantity}x ${opt.bedType} (${opt.spotsPerUnit} spots, ${opt.quality})`);
|
|
});
|
|
console.log(` Max sleeping places: ${result.maxSleepingPlaces}`);
|
|
console.log(` Suitable for 4: ${result.suitableFor4 ? '✅ Yes' : '❌ No'}`);
|
|
console.log(` Quality: ${result.sleepingDataQuality}`);
|
|
} else {
|
|
console.log(' No sleeping options extracted');
|
|
}
|
|
console.log('');
|
|
|
|
// Host
|
|
console.log('👤 HOST:');
|
|
console.log(` Name: ${result.hostName?.value || 'N/A'}\n`);
|
|
|
|
// Images
|
|
console.log('🖼️ IMAGES:');
|
|
console.log(` Count: ${result.images?.length || 0}`);
|
|
if (result.images && result.images.length > 0) {
|
|
console.log(` First 3:`);
|
|
result.images.slice(0, 3).forEach((img, i) => {
|
|
console.log(` ${i + 1}. ${img.substring(0, 80)}...`);
|
|
});
|
|
}
|
|
console.log('');
|
|
|
|
// Amenities
|
|
console.log('✨ AMENITIES:');
|
|
console.log(` Count: ${result.amenities?.length || 0}`);
|
|
if (result.amenities && result.amenities.length > 0) {
|
|
console.log(` First 10: ${result.amenities.slice(0, 10).join(', ')}`);
|
|
}
|
|
console.log('');
|
|
|
|
// Description
|
|
console.log('📝 DESCRIPTION:');
|
|
const desc = result.description?.value || 'N/A';
|
|
console.log(` ${desc.substring(0, 200)}${desc.length > 200 ? '...' : ''}\n`);
|
|
|
|
// External ID
|
|
console.log('🔗 EXTERNAL ID:');
|
|
console.log(` ${result.externalId || 'N/A'}\n`);
|
|
|
|
// Extraction Log
|
|
console.log('📋 EXTRACTION LOG:');
|
|
result.extractionLog?.forEach(log => {
|
|
console.log(` - ${log}`);
|
|
});
|
|
|
|
console.log('\n========================================');
|
|
console.log('TEST COMPLETE');
|
|
console.log('========================================');
|
|
|
|
} catch (error) {
|
|
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
|
console.log(`\n❌ Error after ${elapsed}s:`);
|
|
console.error(error);
|
|
}
|
|
}
|
|
|
|
main().catch(console.error);
|