airbnb-finder/test-scraper.ts

128 lines
4.5 KiB
TypeScript
Raw Normal View History

feat: massive Airbnb import pipeline overhaul + UI fixes 🔥 Scraper Improvements: - Add JSON-LD price extraction (regression fix) - Fix sleeping spotsPerUnit bug (was hardcoded to 2) - Remove stale CSS selectors, add robust fallbacks - Add JSON-LD price fallback in extraction pipeline - Improve sleeping parser regex (lastIndex bug fix) - Add 15+ new bed type patterns (murphy, day bed, hammock, plurals) - Smarter deriveSleepingFromBeds() with mixed bed logic 📅 Import Form UX: - Smart defaults (next weekend dates) - Auto-calculate nights display - URL param auto-detection (?check_in=&check_out=&adults=) - Better visual hierarchy with icons - Progress steps during import - Success redirect to listing detail page 🗑️ Delete Button Fix: - Add router.refresh() after successful delete - Inline error state instead of alert() - Admin delete button as proper client component ✏️ Edit/Admin Fixes: - Fix revalidatePath using slug instead of id - Fix redirect to detail page after edit - Add cascade delete logic to admin deleteListing - Extract delete to proper client component 🎨 UI States for Partial Data: - Price: 'Preis auf Anfrage' with context hint - Location: 'Ort nicht erkannt' instead of empty - Sleeping: placeholder when no data - Suitability: 3-state (yes/no/unknown) - Use formatPrice/formatRating utilities 🛏️ Sleeping Data Quality: - Add sleepingDataQuality to Prisma schema - Save quality (EXACT/DERIVED/UNKNOWN) to DB - Display '(geschätzt)' label for derived data 📊 Database: - Restore corrupted schema.prisma from git - Add sleepingDataQuality field - Push schema changes ✅ TypeScript: Zero errors ✅ Build: Successful
2026-03-12 08:07:52 +00:00
/**
* Test script for Puppeteer-based Airbnb scraper
* Run with: npx tsx test-scraper.ts
*/
import { scrapeAirbnbWithPuppeteer } from './src/lib/airbnb/puppeteer-scraper';
const TEST_URL = 'https://www.airbnb.com/rooms/52367822'; // Valid listing in Bad Bellingen, Germany
async function main() {
console.log('========================================');
console.log('Airbnb Puppeteer Scraper Test');
console.log('========================================\n');
console.log(`Testing URL: ${TEST_URL}\n`);
console.log('Starting scraper (this may take 30-60 seconds)...\n');
const startTime = Date.now();
try {
const result = await scrapeAirbnbWithPuppeteer(TEST_URL);
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
console.log(`\n✅ Scraping completed in ${elapsed}s\n`);
if (!result) {
console.log('❌ Result is null - scraping may have failed');
return;
}
console.log('========================================');
console.log('EXTRACTED DATA');
console.log('========================================\n');
// Title
console.log('📌 TITLE:');
console.log(` Value: ${result.title?.value || 'N/A'}`);
console.log(` Source: ${result.title?.source || 'N/A'}`);
console.log(` Confidence: ${result.title?.confidence || 'N/A'}\n`);
// Price
console.log('💰 PRICE:');
console.log(` Nightly: ${result.nightlyPrice?.value || 'N/A'} EUR`);
console.log(` Total: ${result.totalPrice?.value || 'N/A'} EUR`);
console.log(` Status: ${result.priceStatus || 'N/A'}\n`);
// Location
console.log('📍 LOCATION:');
console.log(` Text: ${result.locationText?.value || 'N/A'}`);
console.log(` Lat/Lng: ${result.latitude}, ${result.longitude}\n`);
// Rating
console.log('⭐ RATING:');
console.log(` Rating: ${result.rating?.value || 'N/A'}`);
console.log(` Reviews: ${result.reviewCount?.value || 'N/A'}\n`);
// Capacity
console.log('🏠 CAPACITY:');
console.log(` Guests: ${result.guestCount?.value || 'N/A'}`);
console.log(` Bedrooms: ${result.bedrooms?.value || 'N/A'}`);
console.log(` Beds: ${result.beds?.value || 'N/A'}`);
console.log(` Bathrooms: ${result.bathrooms?.value || 'N/A'}\n`);
// Sleeping Options
console.log('🛏️ SLEEPING OPTIONS:');
if (result.sleepingOptions && result.sleepingOptions.length > 0) {
result.sleepingOptions.forEach((opt, i) => {
console.log(` ${i + 1}. ${opt.quantity}x ${opt.bedType} (${opt.spotsPerUnit} spots, ${opt.quality})`);
});
console.log(` Max sleeping places: ${result.maxSleepingPlaces}`);
console.log(` Suitable for 4: ${result.suitableFor4 ? '✅ Yes' : '❌ No'}`);
console.log(` Quality: ${result.sleepingDataQuality}`);
} else {
console.log(' No sleeping options extracted');
}
console.log('');
// Host
console.log('👤 HOST:');
console.log(` Name: ${result.hostName?.value || 'N/A'}\n`);
// Images
console.log('🖼️ IMAGES:');
console.log(` Count: ${result.images?.length || 0}`);
if (result.images && result.images.length > 0) {
console.log(` First 3:`);
result.images.slice(0, 3).forEach((img, i) => {
console.log(` ${i + 1}. ${img.substring(0, 80)}...`);
});
}
console.log('');
// Amenities
console.log('✨ AMENITIES:');
console.log(` Count: ${result.amenities?.length || 0}`);
if (result.amenities && result.amenities.length > 0) {
console.log(` First 10: ${result.amenities.slice(0, 10).join(', ')}`);
}
console.log('');
// Description
console.log('📝 DESCRIPTION:');
const desc = result.description?.value || 'N/A';
console.log(` ${desc.substring(0, 200)}${desc.length > 200 ? '...' : ''}\n`);
// External ID
console.log('🔗 EXTERNAL ID:');
console.log(` ${result.externalId || 'N/A'}\n`);
// Extraction Log
console.log('📋 EXTRACTION LOG:');
result.extractionLog?.forEach(log => {
console.log(` - ${log}`);
});
console.log('\n========================================');
console.log('TEST COMPLETE');
console.log('========================================');
} catch (error) {
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
console.log(`\n❌ Error after ${elapsed}s:`);
console.error(error);
}
}
main().catch(console.error);