airbnb-finder/test-scraper.ts
AI d9a203016f feat: massive Airbnb import pipeline overhaul + UI fixes
🔥 Scraper Improvements:
- Add JSON-LD price extraction (regression fix)
- Fix sleeping spotsPerUnit bug (was hardcoded to 2)
- Remove stale CSS selectors, add robust fallbacks
- Add JSON-LD price fallback in extraction pipeline
- Improve sleeping parser regex (lastIndex bug fix)
- Add 15+ new bed type patterns (murphy, day bed, hammock, plurals)
- Smarter deriveSleepingFromBeds() with mixed bed logic

📅 Import Form UX:
- Smart defaults (next weekend dates)
- Auto-calculate nights display
- URL param auto-detection (?check_in=&check_out=&adults=)
- Better visual hierarchy with icons
- Progress steps during import
- Success redirect to listing detail page

🗑️ Delete Button Fix:
- Add router.refresh() after successful delete
- Inline error state instead of alert()
- Admin delete button as proper client component

✏️ Edit/Admin Fixes:
- Fix revalidatePath using slug instead of id
- Fix redirect to detail page after edit
- Add cascade delete logic to admin deleteListing
- Extract delete to proper client component

🎨 UI States for Partial Data:
- Price: 'Preis auf Anfrage' with context hint
- Location: 'Ort nicht erkannt' instead of empty
- Sleeping: placeholder when no data
- Suitability: 3-state (yes/no/unknown)
- Use formatPrice/formatRating utilities

🛏️ Sleeping Data Quality:
- Add sleepingDataQuality to Prisma schema
- Save quality (EXACT/DERIVED/UNKNOWN) to DB
- Display '(geschätzt)' label for derived data

📊 Database:
- Restore corrupted schema.prisma from git
- Add sleepingDataQuality field
- Push schema changes

 TypeScript: Zero errors
 Build: Successful
2026-03-12 08:07:52 +00:00

128 lines
4.5 KiB
TypeScript

/**
* Test script for Puppeteer-based Airbnb scraper
* Run with: npx tsx test-scraper.ts
*/
import { scrapeAirbnbWithPuppeteer } from './src/lib/airbnb/puppeteer-scraper';
const TEST_URL = 'https://www.airbnb.com/rooms/52367822'; // Valid listing in Bad Bellingen, Germany
async function main() {
console.log('========================================');
console.log('Airbnb Puppeteer Scraper Test');
console.log('========================================\n');
console.log(`Testing URL: ${TEST_URL}\n`);
console.log('Starting scraper (this may take 30-60 seconds)...\n');
const startTime = Date.now();
try {
const result = await scrapeAirbnbWithPuppeteer(TEST_URL);
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
console.log(`\n✅ Scraping completed in ${elapsed}s\n`);
if (!result) {
console.log('❌ Result is null - scraping may have failed');
return;
}
console.log('========================================');
console.log('EXTRACTED DATA');
console.log('========================================\n');
// Title
console.log('📌 TITLE:');
console.log(` Value: ${result.title?.value || 'N/A'}`);
console.log(` Source: ${result.title?.source || 'N/A'}`);
console.log(` Confidence: ${result.title?.confidence || 'N/A'}\n`);
// Price
console.log('💰 PRICE:');
console.log(` Nightly: ${result.nightlyPrice?.value || 'N/A'} EUR`);
console.log(` Total: ${result.totalPrice?.value || 'N/A'} EUR`);
console.log(` Status: ${result.priceStatus || 'N/A'}\n`);
// Location
console.log('📍 LOCATION:');
console.log(` Text: ${result.locationText?.value || 'N/A'}`);
console.log(` Lat/Lng: ${result.latitude}, ${result.longitude}\n`);
// Rating
console.log('⭐ RATING:');
console.log(` Rating: ${result.rating?.value || 'N/A'}`);
console.log(` Reviews: ${result.reviewCount?.value || 'N/A'}\n`);
// Capacity
console.log('🏠 CAPACITY:');
console.log(` Guests: ${result.guestCount?.value || 'N/A'}`);
console.log(` Bedrooms: ${result.bedrooms?.value || 'N/A'}`);
console.log(` Beds: ${result.beds?.value || 'N/A'}`);
console.log(` Bathrooms: ${result.bathrooms?.value || 'N/A'}\n`);
// Sleeping Options
console.log('🛏️ SLEEPING OPTIONS:');
if (result.sleepingOptions && result.sleepingOptions.length > 0) {
result.sleepingOptions.forEach((opt, i) => {
console.log(` ${i + 1}. ${opt.quantity}x ${opt.bedType} (${opt.spotsPerUnit} spots, ${opt.quality})`);
});
console.log(` Max sleeping places: ${result.maxSleepingPlaces}`);
console.log(` Suitable for 4: ${result.suitableFor4 ? '✅ Yes' : '❌ No'}`);
console.log(` Quality: ${result.sleepingDataQuality}`);
} else {
console.log(' No sleeping options extracted');
}
console.log('');
// Host
console.log('👤 HOST:');
console.log(` Name: ${result.hostName?.value || 'N/A'}\n`);
// Images
console.log('🖼️ IMAGES:');
console.log(` Count: ${result.images?.length || 0}`);
if (result.images && result.images.length > 0) {
console.log(` First 3:`);
result.images.slice(0, 3).forEach((img, i) => {
console.log(` ${i + 1}. ${img.substring(0, 80)}...`);
});
}
console.log('');
// Amenities
console.log('✨ AMENITIES:');
console.log(` Count: ${result.amenities?.length || 0}`);
if (result.amenities && result.amenities.length > 0) {
console.log(` First 10: ${result.amenities.slice(0, 10).join(', ')}`);
}
console.log('');
// Description
console.log('📝 DESCRIPTION:');
const desc = result.description?.value || 'N/A';
console.log(` ${desc.substring(0, 200)}${desc.length > 200 ? '...' : ''}\n`);
// External ID
console.log('🔗 EXTERNAL ID:');
console.log(` ${result.externalId || 'N/A'}\n`);
// Extraction Log
console.log('📋 EXTRACTION LOG:');
result.extractionLog?.forEach(log => {
console.log(` - ${log}`);
});
console.log('\n========================================');
console.log('TEST COMPLETE');
console.log('========================================');
} catch (error) {
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
console.log(`\n❌ Error after ${elapsed}s:`);
console.error(error);
}
}
main().catch(console.error);