airbnb-finder/test-scraper-debug.ts

97 lines
3.2 KiB
TypeScript
Raw Permalink Normal View History

feat: massive Airbnb import pipeline overhaul + UI fixes 🔥 Scraper Improvements: - Add JSON-LD price extraction (regression fix) - Fix sleeping spotsPerUnit bug (was hardcoded to 2) - Remove stale CSS selectors, add robust fallbacks - Add JSON-LD price fallback in extraction pipeline - Improve sleeping parser regex (lastIndex bug fix) - Add 15+ new bed type patterns (murphy, day bed, hammock, plurals) - Smarter deriveSleepingFromBeds() with mixed bed logic 📅 Import Form UX: - Smart defaults (next weekend dates) - Auto-calculate nights display - URL param auto-detection (?check_in=&check_out=&adults=) - Better visual hierarchy with icons - Progress steps during import - Success redirect to listing detail page 🗑️ Delete Button Fix: - Add router.refresh() after successful delete - Inline error state instead of alert() - Admin delete button as proper client component ✏️ Edit/Admin Fixes: - Fix revalidatePath using slug instead of id - Fix redirect to detail page after edit - Add cascade delete logic to admin deleteListing - Extract delete to proper client component 🎨 UI States for Partial Data: - Price: 'Preis auf Anfrage' with context hint - Location: 'Ort nicht erkannt' instead of empty - Sleeping: placeholder when no data - Suitability: 3-state (yes/no/unknown) - Use formatPrice/formatRating utilities 🛏️ Sleeping Data Quality: - Add sleepingDataQuality to Prisma schema - Save quality (EXACT/DERIVED/UNKNOWN) to DB - Display '(geschätzt)' label for derived data 📊 Database: - Restore corrupted schema.prisma from git - Add sleepingDataQuality field - Push schema changes ✅ TypeScript: Zero errors ✅ Build: Successful
2026-03-12 08:07:52 +00:00
/**
* Debug test - captures more info about what's happening
*/
import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
puppeteer.use(StealthPlugin());
const TEST_URL = 'https://www.airbnb.com/rooms/842937876795894279';
async function main() {
console.log('Starting debug test...\n');
const browser = await puppeteer.launch({
headless: true,
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-gpu',
'--window-size=1920,1080',
],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
console.log(`Navigating to: ${TEST_URL}`);
// Track redirects
page.on('response', (response) => {
const status = response.status();
const url = response.url();
if (status >= 300 && status < 400) {
console.log(`🔄 Redirect: ${status}${response.headers()['location']?.substring(0, 100)}`);
}
});
try {
const response = await page.goto(TEST_URL, {
waitUntil: 'networkidle2',
timeout: 60000
});
console.log(`\n📊 Response status: ${response?.status()}`);
console.log(`📊 Final URL: ${page.url()}`);
console.log(`📊 Page title: ${await page.title()}`);
// Wait longer for dynamic content
console.log('\n⏳ Waiting 5 seconds for dynamic content...');
await new Promise(r => setTimeout(r, 5000));
// Get page content
const html = await page.content();
console.log(`\n📄 HTML length: ${html.length} chars`);
// Check for challenge page
if (html.includes('challenge') || html.includes('captcha') || html.includes('blocked')) {
console.log('⚠️ Possible challenge/blocked page detected!');
}
// Check if we're on the homepage
if (page.url() === 'https://www.airbnb.com/' || page.url() === 'https://www.airbnb.com') {
console.log('⚠️ Redirected to homepage - likely blocked!');
}
// Extract visible text
const bodyText = await page.evaluate(() => document.body.innerText);
console.log(`\n📝 Body text length: ${bodyText.length} chars`);
console.log(`\n📝 First 500 chars of visible text:\n${bodyText.substring(0, 500)}`);
// Check for specific listing elements
const hasListingTitle = await page.$('[data-plugin-in-point-id="TITLE_DEFAULT"]');
const hasPhotos = await page.$('[data-section-id="PHOTO_PICKER"]');
const hasPrice = await page.$('[data-plugin-in-point-id="PRICE_DEFAULT"]');
console.log(`\n🔍 Listing elements found:`);
console.log(` Title section: ${hasListingTitle ? '✅' : '❌'}`);
console.log(` Photos section: ${hasPhotos ? '✅' : '❌'}`);
console.log(` Price section: ${hasPrice ? '✅' : '❌'}`);
// Take a screenshot
await page.screenshot({ path: 'debug-screenshot.png', fullPage: false });
console.log(`\n📸 Screenshot saved to: debug-screenshot.png`);
} catch (error) {
console.error('Error:', error);
} finally {
await browser.close();
}
}
main();