diff --git a/debug-screenshot.png b/debug-screenshot.png new file mode 100644 index 0000000..5140523 Binary files /dev/null and b/debug-screenshot.png differ diff --git a/prisma/dev.db b/prisma/dev.db index e69de29..03f9fda 100644 Binary files a/prisma/dev.db and b/prisma/dev.db differ diff --git a/prisma/prisma/dev.db b/prisma/dev.db.corrupted similarity index 83% rename from prisma/prisma/dev.db rename to prisma/dev.db.corrupted index 2dc33c9..167c71f 100644 Binary files a/prisma/prisma/dev.db and b/prisma/dev.db.corrupted differ diff --git a/prisma/schema.prisma b/prisma/schema.prisma index dd081c1..a2feb51 100644 --- a/prisma/schema.prisma +++ b/prisma/schema.prisma @@ -40,10 +40,13 @@ model Listing { // Capacity guestCount Int? @map("guest_count") officialGuestCount Int? @map("official_guest_count") + + // Sleeping Analysis maxSleepingPlaces Int? @map("max_sleeping_places") suitableFor4 Boolean? @map("suitable_for_4") extraMattressesNeededFor4 Int? @map("extra_mattresses_needed_for_4") bedTypesSummary String? @map("bed_types_summary") + sleepingDataQuality String? @map("sleeping_data_quality") // EXACT, DERIVED, UNKNOWN // Room Details bedrooms Int? diff --git a/src/actions/import-listing.ts b/src/actions/import-listing.ts index b94fee3..3e6828a 100644 --- a/src/actions/import-listing.ts +++ b/src/actions/import-listing.ts @@ -73,6 +73,7 @@ export async function importListingAction(formData: FormData) { let suitableFor4 = scrapedData?.suitableFor4 || null; let extraMattressesNeededFor4 = scrapedData?.extraMattressesNeededFor4 || null; let bedTypesSummary = null; + let sleepingDataQuality = scrapedData?.sleepingDataQuality || 'UNKNOWN'; if (scrapedData?.sleepingOptions && scrapedData.sleepingOptions.length > 0) { const types = scrapedData.sleepingOptions.map(o => `${o.quantity}× ${o.bedType}`); @@ -110,6 +111,7 @@ export async function importListingAction(formData: FormData) { suitableFor4, extraMattressesNeededFor4, bedTypesSummary, + sleepingDataQuality, // Room Details bedrooms: scrapedData?.bedrooms?.value || null, diff --git a/src/app/(protected)/admin/import/import-form.tsx b/src/app/(protected)/admin/import/import-form.tsx index 330aeac..cefc469 100644 --- a/src/app/(protected)/admin/import/import-form.tsx +++ b/src/app/(protected)/admin/import/import-form.tsx @@ -1,26 +1,86 @@ "use client"; import { useState } from "react"; +import { useRouter } from "next/navigation"; import { Button } from "@/components/ui/button"; import { Input } from "@/components/ui/input"; import { Label } from "@/components/ui/label"; import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; import { importListingAction } from "@/actions/import-listing"; +// Calculate next weekend (Friday → Sunday) +function getNextWeekend(): { checkIn: string; checkOut: string } { + const now = new Date(); + const dayOfWeek = now.getDay(); // 0=Sun, 5=Fri, 6=Sat + const daysUntilFriday = dayOfWeek <= 5 ? 5 - dayOfWeek : 7 - dayOfWeek + 5; + const friday = new Date(now); + friday.setDate(now.getDate() + daysUntilFriday); + const sunday = new Date(friday); + sunday.setDate(friday.getDate() + 2); + return { + checkIn: friday.toISOString().split("T")[0], + checkOut: sunday.toISOString().split("T")[0], + }; +} + +// Extract Airbnb URL params +function extractParamsFromUrl(url: string): { + checkIn: string; + checkOut: string; + adults: string; +} | null { + try { + const u = new URL(url); + return { + checkIn: u.searchParams.get("check_in") || "", + checkOut: u.searchParams.get("check_out") || "", + adults: u.searchParams.get("adults") || "", + }; + } catch { + return null; + } +} + export function ImportForm() { + const router = useRouter(); + const weekend = getNextWeekend(); + const [url, setUrl] = useState(""); - const [checkIn, setCheckIn] = useState(""); - const [checkOut, setCheckOut] = useState(""); + const [checkIn, setCheckIn] = useState(weekend.checkIn); + const [checkOut, setCheckOut] = useState(weekend.checkOut); const [adults, setAdults] = useState("4"); const [error, setError] = useState(""); - const [success, setSuccess] = useState(false); const [isLoading, setIsLoading] = useState(false); + const [progress, setProgress] = useState(""); + + const hasDates = checkIn && checkOut; + const nights = hasDates + ? Math.max( + 1, + Math.round( + (new Date(checkOut).getTime() - new Date(checkIn).getTime()) / + (1000 * 60 * 60 * 24) + ) + ) + : null; + + // Auto-extract params when URL changes + const handleUrlChange = (e: React.ChangeEvent) => { + const newUrl = e.target.value; + setUrl(newUrl); + const params = extractParamsFromUrl(newUrl); + if (params) { + if (params.checkIn) setCheckIn(params.checkIn); + if (params.checkOut) setCheckOut(params.checkOut); + if (params.adults) setAdults(params.adults); + } + }; const handleSubmit = async (e: React.FormEvent) => { e.preventDefault(); setError(""); - setSuccess(false); setIsLoading(true); + setProgress("🔍 Scraping Airbnb-Seite..."); const formData = new FormData(); formData.append("airbnbUrl", url); @@ -28,20 +88,29 @@ export function ImportForm() { if (checkOut) formData.append("checkOut", checkOut); if (adults) formData.append("adults", adults); + // Progress steps + const t1 = setTimeout(() => setProgress("📊 Extrahiere Daten..."), 2000); + const t2 = setTimeout(() => setProgress("💾 Speichere in Datenbank..."), 5000); + const result = await importListingAction(formData); - if (result.ok) { - setSuccess(true); - setUrl(""); + clearTimeout(t1); + clearTimeout(t2); + + if (result.ok && result.slug) { + setProgress("✅ Fertig! Weiterleitung..."); + setTimeout(() => router.push(`/listings/${result.slug}`), 500); + return; } else if (result.error) { setError(result.error); } setIsLoading(false); + setProgress(""); }; // Get today's date for min date - const today = new Date().toISOString().split('T')[0]; + const today = new Date().toISOString().split("T")[0]; return ( @@ -49,70 +118,148 @@ export function ImportForm() { 🏠 Neues Airbnb importieren -
- {/* URL Field */} + + {/* URL Field - Prominent */}
- + setUrl(e.target.value)} + onChange={handleUrlChange} required autoFocus + className="text-lg h-12" + disabled={isLoading} /> +

+ Einfach den Airbnb-Link einfügen — Reisedaten werden automatisch + erkannt falls in der URL enthalten. +

- {/* Trip Context Fields */} -
- -
-
- + {/* Trip Context Fields - Grouped */} +
+ + {hasDates ? "✅" : "⚠️"} Reisedaten{" "} + + (optional — für bessere Preise) + + + +
+ {/* Check-in */} +
+ setCheckIn(e.target.value)} min={today} - placeholder="Datum" />
-
- + + {/* Nights Display */} +
+ {nights != null ? ( + + {nights} {nights === 1 ? "Nacht" : "Nächte"} + + ) : ( + + )} +
+ + {/* Check-out */} +
+ setCheckOut(e.target.value)} min={checkIn || today} - placeholder="Datum" - /> -
-
- - setAdults(e.target.value)} />
-

- 💡 Mit Reisedaten kann der Preis genauer ermittelt werden. - Die Daten werden auch aus der URL extrahiert wenn vorhanden. -

-
- {error &&
{error}
} - {success &&
✓ Erfolgreich importiert!
} - -
+ + {/* Error */} + {error && ( +
+ ❌ {error} +
+ )} + + {/* Loading Progress */} + {isLoading && progress && ( +
+ + {progress} +
+ )} + + {/* Submit Button */} + diff --git a/src/app/(protected)/admin/listings/[slug]/delete-button.tsx b/src/app/(protected)/admin/listings/[slug]/delete-button.tsx new file mode 100644 index 0000000..d19bb42 --- /dev/null +++ b/src/app/(protected)/admin/listings/[slug]/delete-button.tsx @@ -0,0 +1,49 @@ +"use client"; + +import { useState } from "react"; +import { useRouter } from "next/navigation"; +import { Button } from "@/components/ui/button"; +import { deleteListing } from "../actions"; + +interface DeleteListingButtonProps { + listingId: string; + listingTitle: string; +} + +export function DeleteListingButton({ listingId, listingTitle }: DeleteListingButtonProps) { + const [isDeleting, setIsDeleting] = useState(false); + const [error, setError] = useState(null); + const router = useRouter(); + + const handleDelete = async () => { + if (!confirm(`"${listingTitle}" wirklich löschen?`)) return; + + setError(null); + setIsDeleting(true); + try { + const formData = new FormData(); + formData.append("id", listingId); + await deleteListing(formData); + router.push("/listings"); + } catch (err) { + setError(err instanceof Error ? err.message : "Fehler beim Löschen"); + setIsDeleting(false); + } + }; + + return ( +
+ + {error && ( +

{error}

+ )} +
+ ); +} diff --git a/src/app/(protected)/admin/listings/[slug]/page.tsx b/src/app/(protected)/admin/listings/[slug]/page.tsx index 5965e67..edbebef 100644 --- a/src/app/(protected)/admin/listings/[slug]/page.tsx +++ b/src/app/(protected)/admin/listings/[slug]/page.tsx @@ -4,7 +4,8 @@ import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; import { Button } from "@/components/ui/button"; import { Input } from "@/components/ui/input"; import { Label } from "@/components/ui/label"; -import { updateListing, deleteListing, addNote, addTagToListing, removeTagFromListing } from "../actions"; +import { updateListing, addNote, addTagToListing, removeTagFromListing } from "../actions"; +import { DeleteListingButton } from "./delete-button"; // Note: actions.ts is in /admin/listings/, so from [slug]/ we go up one level with ../ export default async function EditListingPage({ @@ -213,21 +214,12 @@ export default async function EditListingPage({
-
- - -
+
+ +
diff --git a/src/app/(protected)/admin/listings/actions.ts b/src/app/(protected)/admin/listings/actions.ts index a530ef1..7a52cd1 100644 --- a/src/app/(protected)/admin/listings/actions.ts +++ b/src/app/(protected)/admin/listings/actions.ts @@ -21,6 +21,12 @@ export async function updateListing(formData: FormData) { const status = formData.get("status") as string; const isFavorite = formData.get("isFavorite") === "true"; + // Fetch slug before update for revalidatePath and redirect + const existing = await prisma.listing.findUnique({ + where: { id }, + select: { slug: true }, + }); + await prisma.listing.update({ where: { id }, data: { @@ -41,14 +47,34 @@ export async function updateListing(formData: FormData) { }, }); + const slug = existing?.slug; revalidatePath("/listings"); - revalidatePath(`/listings/${id}`); - redirect(`/listings`); + if (slug) { + revalidatePath(`/listings/${slug}`); + } + redirect(`/listings/${slug ?? ""}`); } export async function deleteListing(formData: FormData) { const id = formData.get("id") as string; + // Delete related records first to avoid foreign key constraint errors + await prisma.listingTag.deleteMany({ + where: { listingId: id }, + }); + + await prisma.listingSleepingOption.deleteMany({ + where: { listingId: id }, + }); + + await prisma.listingImage.deleteMany({ + where: { listingId: id }, + }); + + await prisma.adminNote.deleteMany({ + where: { listingId: id }, + }); + await prisma.listing.delete({ where: { id }, }); diff --git a/src/app/(protected)/listings/[slug]/page.tsx b/src/app/(protected)/listings/[slug]/page.tsx index 34ae714..fb6f49f 100644 --- a/src/app/(protected)/listings/[slug]/page.tsx +++ b/src/app/(protected)/listings/[slug]/page.tsx @@ -114,7 +114,7 @@ export default async function ListingDetailPage({ params }: PageProps) {
- {listing.sleepingOptions.length > 0 && ( + {listing.sleepingOptions.length > 0 ? (

Schlafmöglichkeiten

@@ -128,6 +128,10 @@ export default async function ListingDetailPage({ params }: PageProps) { ))}
+ ) : ( +

+ ⚠️ Schlafplatzdetails nicht erkannt +

)}
@@ -158,11 +162,24 @@ export default async function ListingDetailPage({ params }: PageProps) {

{listing.title}

-

📍 {listing.locationText || "Ort unbekannt"}

+

📍 {listing.locationText || "Ort nicht erkannt"}

- {formatPrice(listing.nightlyPrice)} - / Nacht + {listing.nightlyPrice != null ? ( + <> + {formatPrice(listing.nightlyPrice)} + / Nacht + + ) : ( +
+ Preis auf Anfrage +

+ {listing.priceStatus === 'REQUIRES_TRIP_CONTEXT' + ? '💡 Mit Reisedaten ermittelbar' + : 'Nicht ermittelbar'} +

+
+ )}
diff --git a/src/app/(protected)/listings/delete-button.tsx b/src/app/(protected)/listings/delete-button.tsx index ede5567..33ad4b1 100644 --- a/src/app/(protected)/listings/delete-button.tsx +++ b/src/app/(protected)/listings/delete-button.tsx @@ -1,6 +1,7 @@ "use client"; import { useState } from "react"; +import { useRouter } from "next/navigation"; import { Button } from "@/components/ui/button"; import { deleteListing } from "./actions"; @@ -11,30 +12,39 @@ interface DeleteListingButtonProps { export function DeleteListingButton({ listingId, listingTitle }: DeleteListingButtonProps) { const [isDeleting, setIsDeleting] = useState(false); + const [error, setError] = useState(null); + const router = useRouter(); const handleDelete = async () => { if (!confirm(`"${listingTitle}" wirklich löschen?`)) return; - + + setError(null); setIsDeleting(true); try { const formData = new FormData(); formData.append("id", listingId); await deleteListing(formData); - } catch (error) { - alert("Fehler beim Löschen: " + (error as Error).message); + router.refresh(); + } catch (err) { + setError(err instanceof Error ? err.message : "Fehler beim Löschen"); setIsDeleting(false); } }; return ( - +
+ + {error && ( +

{error}

+ )} +
); } diff --git a/src/app/(protected)/listings/page.tsx b/src/app/(protected)/listings/page.tsx index a69c29b..834ccd6 100644 --- a/src/app/(protected)/listings/page.tsx +++ b/src/app/(protected)/listings/page.tsx @@ -2,6 +2,7 @@ import { prisma } from "@/lib/prisma"; import { Card, CardContent } from "@/components/ui/card"; import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; +import { formatPrice, formatRating } from "@/lib/utils"; import Link from "next/link"; import { DeleteListingButton } from "./delete-button"; @@ -57,8 +58,12 @@ export default async function ListingsPage() { {/* Price & Rating */}
- €{listing.nightlyPrice?.toFixed(2) || "—"} - ⭐ {listing.rating?.toFixed(2) || "—"} + {listing.nightlyPrice != null ? ( + {formatPrice(listing.nightlyPrice)} + ) : ( + Preis auf Anfrage + )} + ⭐ {formatRating(listing.rating)}
{/* Tags */} @@ -77,12 +82,14 @@ export default async function ListingsPage() { )} {/* Sleep Info */} - {listing.suitableFor4 ? ( + {listing.suitableFor4 === true ? (

✅ Geeignet für 4 Personen

- ) : ( + ) : listing.suitableFor4 === false ? (

⚠️ Nicht ideal für 4 {listing.extraMattressesNeededFor4 ? `(+${listing.extraMattressesNeededFor4} Matratzen)` : ""}

+ ) : ( +

❓ Schlafplatz-Info unbekannt

)} {/* Actions */} diff --git a/src/lib/airbnb/index.ts b/src/lib/airbnb/index.ts index 494c2c5..a520533 100644 --- a/src/lib/airbnb/index.ts +++ b/src/lib/airbnb/index.ts @@ -1,22 +1,8 @@ -import * as cheerio from "cheerio"; +import { scrapeAirbnbWithPuppeteer } from "./puppeteer-scraper"; import { normalizeAirbnbUrlWithContext } from "./url-normalizer"; -import { parseCapacityFacts, parseRating, parseHost, parseMaxGuests, extractVisibleText, parseTitle } from "./parsers/text-patterns"; -import { parseSleepingArrangements, calculateSleepingStats, deriveSleepingFromBeds } from "./parsers/sleeping"; -import { extractPrice } from "./parsers/price"; -import { extractLocation } from "./parsers/location"; -import { parseJsonLd } from "./parsers/jsonld"; -import { - ExtractedListing, - FieldSource, - field, - mergeField, - TripContext, - SleepingDataQuality, - PriceStatus -} from "./types"; // ============================================ -// Main Scraper Function +// Main Scraper Function - Uses Puppeteer for JS rendering // ============================================ export async function scrapeAirbnbListing( @@ -24,156 +10,27 @@ export async function scrapeAirbnbListing( options?: { tripContext?: TripContext; usePlaywright?: boolean } ): Promise { try { - // Step 1: Normalize URL and extract trip context + // Normalize URL and extract trip context const normalized = normalizeAirbnbUrlWithContext(url); // Merge trip context from options with URL-extracted context - const tripContext: TripContext = { + const tripContext = { checkIn: options?.tripContext?.checkIn || normalized.tripContext.checkIn, checkOut: options?.tripContext?.checkOut || normalized.tripContext.checkOut, adults: options?.tripContext?.adults || normalized.tripContext.adults || 4, }; - - // Step 2: Fetch HTML - const html = await fetchHtml(normalized.normalized); - const $ = cheerio.load(html); - // Step 3: Extract visible text for pattern matching - const visibleText = extractVisibleText(html); + // Use Puppeteer to render JavaScript and extract data + const result = await scrapeAirbnbWithPuppeteer(normalized.normalized, { tripContext }); - // Step 4: Run all parsers - const jsonldData = parseJsonLd($); - const capacityFacts = parseCapacityFacts(visibleText); - const ratingFacts = parseRating(visibleText); - const hostName = parseHost(visibleText); - const maxGuests = parseMaxGuests(visibleText); - const sleepingOptions = parseSleepingArrangements(visibleText); - const priceData = extractPrice(html, $, tripContext); - const locationData = extractLocation($, html); - const pageTitle = parseTitle(html); - - // Step 5: Build the result with priority: jsonld > text_pattern > derived - const result: ExtractedListing = { - // URLs - originalUrl: normalized.original, - normalizedUrl: normalized.normalized, - externalId: normalized.externalId, - - // Basic Info - title: mergeField( - jsonldData.title ? field(jsonldData.title, 'jsonld', 'high') : null, - pageTitle ? field(pageTitle, 'text_pattern', 'medium') : field(null, 'derived', 'low') - ), - description: mergeField( - jsonldData.description ? field(jsonldData.description, 'jsonld', 'high') : null, - field(null, 'derived', 'low') - ), - - // Location - locationText: locationData.locationText, - latitude: mergeField( - jsonldData.latitude ? field(jsonldData.latitude, 'jsonld', 'high') : null, - locationData.latitude.value !== null ? locationData.latitude : field(null, 'derived', 'low') - ), - longitude: mergeField( - jsonldData.longitude ? field(jsonldData.longitude, 'jsonld', 'high') : null, - locationData.longitude.value !== null ? locationData.longitude : field(null, 'derived', 'low') - ), - - // Pricing - tripContext, - nightlyPrice: priceData.nightly, - totalPrice: priceData.total, - priceStatus: priceData.status, - - // Rating - rating: mergeField( - ratingFacts ? field(ratingFacts.rating, 'text_pattern', 'high') : null, - jsonldData.rating ? field(jsonldData.rating, 'jsonld', 'medium') : null - ), - reviewCount: mergeField( - ratingFacts && ratingFacts.reviewCount > 0 ? field(ratingFacts.reviewCount, 'text_pattern', 'high') : null, - jsonldData.reviewCount ? field(jsonldData.reviewCount, 'jsonld', 'medium') : null - ), - - // Capacity - guestCount: mergeField( - capacityFacts ? field(capacityFacts.guests, 'text_pattern', 'high') : null, - field(null, 'derived', 'low') - ), - officialGuestCount: mergeField( - maxGuests ? field(maxGuests, 'text_pattern', 'high') : null, - field(null, 'derived', 'low') - ), - bedrooms: mergeField( - capacityFacts ? field(capacityFacts.bedrooms, 'text_pattern', 'high') : null, - field(null, 'derived', 'low') - ), - beds: mergeField( - capacityFacts ? field(capacityFacts.beds, 'text_pattern', 'high') : null, - field(null, 'derived', 'low') - ), - bathrooms: mergeField( - capacityFacts ? field(capacityFacts.bathrooms, 'text_pattern', 'high') : null, - field(null, 'derived', 'low') - ), - - // Sleeping - sleepingOptions, - maxSleepingPlaces: 0, - suitableFor4: false, - extraMattressesNeededFor4: 0, - sleepingDataQuality: 'UNKNOWN', - - // Host - hostName: mergeField( - hostName ? field(hostName, 'text_pattern', 'high') : null, - jsonldData.hostName ? field(jsonldData.hostName, 'jsonld', 'medium') : null - ), - - // Amenities - amenities: jsonldData.amenities || [], - - // Images - images: jsonldData.images || [], - coverImage: jsonldData.images?.[0] || null, - - // Other - cancellationPolicy: field(null, 'derived', 'low'), - - // Debug - rawSnippets: { - title: jsonldData.title || '', - visibleText: visibleText.substring(0, 2000), - }, - extractionLog: [ - `URL normalized: ${normalized.normalized}`, - `External ID: ${normalized.externalId}`, - `Trip context: ${JSON.stringify(tripContext)}`, - `Capacity facts: ${capacityFacts ? JSON.stringify(capacityFacts) : 'none'}`, - `Rating facts: ${ratingFacts ? JSON.stringify(ratingFacts) : 'none'}`, - `Sleeping options: ${sleepingOptions.length} found`, - ], - }; - - // Step 6: Calculate sleeping stats - if (sleepingOptions.length > 0) { - const stats = calculateSleepingStats(sleepingOptions); - result.maxSleepingPlaces = stats.maxSleepingPlaces; - result.suitableFor4 = stats.suitableFor4; - result.extraMattressesNeededFor4 = stats.extraMattressesNeededFor4; - result.sleepingDataQuality = 'EXACT'; - } else if (result.beds.value && result.guestCount.value) { - // Derive from beds and guest count - const derivedOptions = deriveSleepingFromBeds(result.beds.value, result.guestCount.value); - const stats = calculateSleepingStats(derivedOptions); - result.sleepingOptions = derivedOptions; - result.maxSleepingPlaces = stats.maxSleepingPlaces; - result.suitableFor4 = stats.suitableFor4; - result.extraMattressesNeededFor4 = stats.extraMattressesNeededFor4; - result.sleepingDataQuality = 'DERIVED'; + if (result) { + // Update URLs with normalized values + result.originalUrl = normalized.original; + result.normalizedUrl = normalized.normalized; + result.externalId = normalized.externalId; + result.tripContext = tripContext; } - + return result; } catch (error) { console.error("Scraping failed:", error); @@ -181,36 +38,9 @@ export async function scrapeAirbnbListing( } } -// ============================================ -// HTML Fetcher - with better error handling and logging -// ============================================ - -async function fetchHtml(url: string): Promise { - const response = await fetch(url, { - headers: { - "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", - "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", - "Accept-Language": "de-DE,de;q=0.9,en-US;q=0.8,en;q=0.7", - "Accept-Encoding": "gzip, deflate, br", - "Cache-Control": "no-cache", - "Upgrade-Insecure-Requests": "1", - }, - }); - - if (!response.ok) { - throw new Error(`HTTP ${response.status} for ${url}`); - } - - const html = await response.text(); - - // Log some debug info - console.log(`[Scraper] Fetched ${url.length} chars`); - console.log(`[Scraper] Contains 'application/ld+json': ${html.includes('application/ld+json')}`); - console.log(`[Scraper] Contains 'airbnb': ${html.toLowerCase().includes('airbnb')}`); - - return html; -} - // Re-export utilities for backward compatibility export { normalizeAirbnbUrlWithContext as normalizeAirbnbUrl } from "./url-normalizer"; export { extractAirbnbExternalId } from "./url-normalizer"; + +// Need to import TripContext for TypeScript +import type { TripContext, ExtractedListing } from "./types"; diff --git a/src/lib/airbnb/parsers/jsonld.ts b/src/lib/airbnb/parsers/jsonld.ts index 1cf75b4..dcbeb63 100644 --- a/src/lib/airbnb/parsers/jsonld.ts +++ b/src/lib/airbnb/parsers/jsonld.ts @@ -12,6 +12,7 @@ export interface JsonLdData { cancellationPolicy: string | null; hostName: string | null; amenities: string[]; + price: number | null; } /** @@ -31,6 +32,7 @@ export function parseJsonLd($: cheerio.CheerioAPI): JsonLdData { cancellationPolicy: null, hostName: null, amenities: [], + price: null, }; const jsonLdScript = $('script[type="application/ld+json"]').html(); @@ -117,6 +119,15 @@ export function parseJsonLd($: cheerio.CheerioAPI): JsonLdData { .filter(Boolean); } + // Price - extract from makesOffer.offers[0].price or offers.price + const priceValue = jsonData.makesOffer?.offers?.[0]?.price ?? jsonData.offers?.price; + if (priceValue !== undefined && priceValue !== null) { + const parsed = typeof priceValue === 'number' ? priceValue : parseFloat(String(priceValue)); + if (!isNaN(parsed)) { + result.price = parsed; + } + } + } catch (error) { console.error('Failed to parse JSON-LD:', error); } diff --git a/src/lib/airbnb/parsers/price.ts b/src/lib/airbnb/parsers/price.ts index 9d81e27..6ff3442 100644 --- a/src/lib/airbnb/parsers/price.ts +++ b/src/lib/airbnb/parsers/price.ts @@ -9,11 +9,12 @@ function tryExtractPriceFromHtml(html: string, $: cheerio.CheerioAPI): number | // Try various price selectors that Airbnb might use const priceSelectors = [ '[data-testid="price-amount"]', + '[data-testid="book-it-default"] span', 'span[class*="Price"]', 'span[class*="price"]', '[itemprop="price"]', - '._1y6k3r2', - '._1dss1omb', + 'div[class*="bookit"] span', + 'section[class*="booking"] span', ]; for (const selector of priceSelectors) { @@ -33,6 +34,16 @@ function tryExtractPriceFromHtml(html: string, $: cheerio.CheerioAPI): number | return priceFromHtml; } + // Fallback: look for "total" near price numbers + const totalPattern = /total[^€$£]*[€$£]\s*(\d[\d.,]*)/i; + const totalMatch = html.match(totalPattern); + if (totalMatch) { + const parsed = parseFloat(totalMatch[1].replace(/[.,](?=\d{3})/g, '').replace(',', '.')); + if (!isNaN(parsed) && parsed > 0) { + return parsed; + } + } + return null; } diff --git a/src/lib/airbnb/parsers/sleeping.ts b/src/lib/airbnb/parsers/sleeping.ts index 87175a1..d59d7bd 100644 --- a/src/lib/airbnb/parsers/sleeping.ts +++ b/src/lib/airbnb/parsers/sleeping.ts @@ -2,8 +2,14 @@ import { BedType, SleepingOption } from '../types'; /** * Bed type configuration: maps text patterns to bed types, spots per unit, and quality + * + * IMPORTANT: Longer/more specific patterns MUST come before shorter ones + * (e.g., "bunk bed" before "bed", "double bed" before "double") */ export const BED_TYPE_CONFIG: Record = { + // Compound bed types (must come first to avoid partial matches) + 'bunk bed': { type: 'BUNK', spots: 2, quality: 'FULL' }, + 'etagenbett': { type: 'BUNK', spots: 2, quality: 'FULL' }, 'double bed': { type: 'DOUBLE', spots: 2, quality: 'FULL' }, 'doppelbett': { type: 'DOUBLE', spots: 2, quality: 'FULL' }, 'queen bed': { type: 'QUEEN', spots: 2, quality: 'FULL' }, @@ -11,11 +17,27 @@ export const BED_TYPE_CONFIG: Record= 2 && guestCount >= beds * 1.5 → mix of double/single (assume mostly double) + * - beds === 1 && guestCount >= 2 → double + * - beds === 1 && guestCount === 1 → single + * - beds >= 2 && guestCount < beds * 1.5 → mostly single */ export function deriveSleepingFromBeds(beds: number, guestCount: number): SleepingOption[] { if (!beds || beds < 1) return []; - // Assume beds are double beds if guest count suggests it - const avgGuestsPerBed = guestCount ? guestCount / beds : 2; + const options: SleepingOption[] = []; - if (avgGuestsPerBed >= 1.5) { - // Likely double beds - return [{ - bedType: 'DOUBLE', - quantity: beds, - spotsPerUnit: 2, - quality: 'FULL', - label: 'double bed (derived)', - }]; - } else { - // Likely single beds - return [{ - bedType: 'SINGLE', - quantity: beds, - spotsPerUnit: 1, - quality: 'FULL', - label: 'single bed (derived)', - }]; + if (beds === 1) { + // Single bed scenario + if (guestCount >= 2) { + // 1 bed for 2+ guests → must be double + options.push({ + bedType: 'DOUBLE', + quantity: 1, + spotsPerUnit: 2, + quality: 'FULL', + label: 'Doppelbett (abgeleitet)', + }); + } else { + // 1 bed for 1 guest → single + options.push({ + bedType: 'SINGLE', + quantity: 1, + spotsPerUnit: 1, + quality: 'FULL', + label: 'Einzelbett (abgeleitet)', + }); + } + } else if (beds >= 2) { + // Multiple beds + const avgGuestsPerBed = guestCount ? guestCount / beds : 2; + + if (avgGuestsPerBed >= 1.5) { + // High guest-to-bed ratio → mix of double and single + // Assume roughly half are double, half single + const doubleCount = Math.ceil(beds / 2); + const singleCount = beds - doubleCount; + + if (doubleCount > 0) { + options.push({ + bedType: 'DOUBLE', + quantity: doubleCount, + spotsPerUnit: 2, + quality: 'FULL', + label: 'Doppelbett (abgeleitet)', + }); + } + if (singleCount > 0) { + options.push({ + bedType: 'SINGLE', + quantity: singleCount, + spotsPerUnit: 1, + quality: 'FULL', + label: 'Einzelbett (abgeleitet)', + }); + } + } else { + // Low guest-to-bed ratio → mostly single beds + options.push({ + bedType: 'SINGLE', + quantity: beds, + spotsPerUnit: 1, + quality: 'FULL', + label: 'Einzelbett (abgeleitet)', + }); + } } + + return options; } diff --git a/src/lib/airbnb/puppeteer-scraper.ts b/src/lib/airbnb/puppeteer-scraper.ts new file mode 100644 index 0000000..1be018b --- /dev/null +++ b/src/lib/airbnb/puppeteer-scraper.ts @@ -0,0 +1,419 @@ +import puppeteer from 'puppeteer-extra'; +import StealthPlugin from 'puppeteer-extra-plugin-stealth'; +import type { Browser, Page } from 'puppeteer'; +import * as cheerio from 'cheerio'; +import { + ExtractedListing, + FieldSource, + field, + mergeField, + TripContext, + PriceStatus, + SleepingDataQuality +} from './types'; +import { parseJsonLd } from './parsers/jsonld'; +import { parseCapacityFacts, parseRating, parseHost, parseMaxGuests, extractVisibleText, parseTitle } from './parsers/text-patterns'; +import { extractLocation } from './parsers/location'; +import { extractPrice } from './parsers/price'; +import { parseSleepingArrangements, calculateSleepingStats, deriveSleepingFromBeds, BED_TYPE_CONFIG } from './parsers/sleeping'; + +// Enable stealth mode +import Stealth from 'puppeteer-extra-plugin-stealth'; +puppeteer.use(Stealth()); + +/** + * Main Puppeteer-based scraper that actually renders JavaScript + */ +export async function scrapeAirbnbWithPuppeteer( + url: string, + options?: { tripContext?: TripContext } +): Promise { + let browser: Browser | null = null; + + try { + // Launch browser with stealth mode + browser = await puppeteer.launch({ + headless: true, + args: [ + '--no-sandbox', + '--disable-setuid-sandbox', + '--disable-dev-shm-usage', + '--disable-gpu', + '--window-size=1920,1080', + ], + }); + + const page: Page = await browser.newPage(); + + // Set realistic viewport and user agent + await page.setViewport({ width: 1920, height: 1080 }); + await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'); + + // Navigate and wait for network idle + console.log(`[Puppeteer] Navigating to ${url}`); + await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 }); + + // Wait a bit more for dynamic content + await new Promise(resolve => setTimeout(resolve, 2000)); + + // Check if we got a 404 or challenge page + const pageTitle = await page.title(); + if (pageTitle.includes('404') || pageTitle.includes('Not Found')) { + console.error('[Puppeteer] Got 404 page'); + return null; + } + + console.log(`[Puppeteer] Page title: ${pageTitle}`); + + // Get rendered HTML + const html = await page.content(); + const $ = cheerio.load(html); + + // Extract visible text for pattern matching + const visibleText = extractVisibleText(html); + console.log(`[Puppeteer] Extracted ${visibleText.length} chars of visible text`); + + // Run all parsers + const jsonldData = parseJsonLd($); + console.log(`[Puppeteer] JSON-LD: title=${!!jsonldData.title}, images=${jsonldData.images.length}`); + + const capacityFacts = parseCapacityFacts(visibleText); + console.log(`[Puppeteer] Capacity: ${JSON.stringify(capacityFacts)}`); + + const ratingFacts = parseRating(visibleText); + const hostName = parseHost(visibleText); + const maxGuests = parseMaxGuests(visibleText); + + // Try to get sleeping arrangements from the rendered page + const sleepingOptions = await parseSleepingArrangementsFromPage(page); + console.log(`[Puppeteer] Sleeping options: ${sleepingOptions.length} found`); + + const tripContext: TripContext = { + checkIn: options?.tripContext?.checkIn || undefined, + checkOut: options?.tripContext?.checkOut || undefined, + adults: options?.tripContext?.adults || 4, + }; + + const priceData = extractPrice(html, $, tripContext); + + // Use JSON-LD price as fallback if price extraction failed + if (jsonldData.price !== null && priceData.nightly.value === null) { + priceData.nightly = { value: jsonldData.price, source: 'jsonld', confidence: 'medium' }; + priceData.status = 'EXTRACTED'; + + // Calculate total if trip context available + if (tripContext.checkIn && tripContext.checkOut) { + try { + const checkIn = new Date(tripContext.checkIn); + const checkOut = new Date(tripContext.checkOut); + const nights = Math.round((checkOut.getTime() - checkIn.getTime()) / (1000 * 60 * 60 * 24)); + if (nights > 0) { + priceData.total = { value: jsonldData.price * nights, source: 'derived', confidence: 'medium' }; + } + } catch { + // Invalid dates, skip total calculation + } + } + } + const locationData = extractLocation($, html); + const pageTitleParsed = parseTitle(html); + + // Extract images from the rendered page (more reliable) + const images = extractImagesFromPage($); + console.log(`[Puppeteer] Found ${images.length} images`); + + // Extract description from rendered page + const description = extractDescriptionFromPage($); + + // Extract amenities if not in JSON-LD + const amenities = jsonldData.amenities.length > 0 + ? jsonldData.amenities + : extractAmenitiesFromPage($); + console.log(`[Puppeteer] Found ${amenities.length} amenities`); + + // Build the result + const result: ExtractedListing = { + originalUrl: url, + normalizedUrl: url, + externalId: extractExternalId(url), + + // Title - try multiple sources + title: mergeField( + jsonldData.title ? field(jsonldData.title, 'jsonld', 'high') : null, + pageTitleParsed ? field(pageTitleParsed, 'text_pattern', 'medium') : null + ), + + description: mergeField( + jsonldData.description ? field(jsonldData.description, 'jsonld', 'high') : null, + description ? field(description, 'dom', 'medium') : null + ), + + // Location + locationText: locationData.locationText.value + ? field(locationData.locationText.value, locationData.locationText.source, locationData.locationText.confidence) + : field(null, 'derived', 'low'), + latitude: locationData.latitude, + longitude: locationData.longitude, + + // Pricing + tripContext, + nightlyPrice: priceData.nightly, + totalPrice: priceData.total, + priceStatus: priceData.status, + + // Rating + rating: mergeField( + ratingFacts ? field(ratingFacts.rating, 'text_pattern', 'high') : null, + jsonldData.rating ? field(jsonldData.rating, 'jsonld', 'medium') : null + ), + reviewCount: mergeField( + ratingFacts && ratingFacts.reviewCount > 0 ? field(ratingFacts.reviewCount, 'text_pattern', 'high') : null, + jsonldData.reviewCount ? field(jsonldData.reviewCount, 'jsonld', 'medium') : null + ), + + // Capacity + guestCount: mergeField( + capacityFacts ? field(capacityFacts.guests, 'text_pattern', 'high') : null, + maxGuests ? field(maxGuests, 'text_pattern', 'medium') : null + ), + officialGuestCount: mergeField( + maxGuests ? field(maxGuests, 'text_pattern', 'high') : null, + field(null, 'derived', 'low') + ), + bedrooms: mergeField( + capacityFacts ? field(capacityFacts.bedrooms, 'text_pattern', 'high') : null, + field(null, 'derived', 'low') + ), + beds: mergeField( + capacityFacts ? field(capacityFacts.beds, 'text_pattern', 'high') : null, + field(null, 'derived', 'low') + ), + bathrooms: mergeField( + capacityFacts ? field(capacityFacts.bathrooms, 'text_pattern', 'high') : null, + field(null, 'derived', 'low') + ), + + // Sleeping + sleepingOptions, + maxSleepingPlaces: 0, + suitableFor4: false, + extraMattressesNeededFor4: 0, + sleepingDataQuality: 'UNKNOWN', + + // Host + hostName: mergeField( + hostName ? field(hostName, 'text_pattern', 'high') : null, + jsonldData.hostName ? field(jsonldData.hostName, 'jsonld', 'medium') : null + ), + + // Amenities + amenities, + + // Images + images, + coverImage: images[0] || null, + + // Other + cancellationPolicy: jsonldData.cancellationPolicy + ? field(jsonldData.cancellationPolicy, 'jsonld', 'high') + : field(null, 'derived', 'low'), + + // Debug + rawSnippets: { + title: jsonldData.title || pageTitleParsed || '', + visibleText: visibleText.substring(0, 2000), + }, + extractionLog: [ + `Puppeteer render: ${url}`, + `Page title: ${pageTitle}`, + `Images found: ${images.length}`, + `Amenities found: ${amenities.length}`, + `Capacity: ${JSON.stringify(capacityFacts)}`, + ], + }; + + // Calculate sleeping stats + if (sleepingOptions.length > 0) { + const stats = calculateSleepingStats(sleepingOptions); + result.maxSleepingPlaces = stats.maxSleepingPlaces; + result.suitableFor4 = stats.suitableFor4; + result.extraMattressesNeededFor4 = stats.extraMattressesNeededFor4; + result.sleepingDataQuality = 'EXACT'; + } else if (result.beds.value && result.guestCount.value) { + const derivedOptions = deriveSleepingFromBeds(result.beds.value, result.guestCount.value); + const stats = calculateSleepingStats(derivedOptions); + result.sleepingOptions = derivedOptions; + result.maxSleepingPlaces = stats.maxSleepingPlaces; + result.suitableFor4 = stats.suitableFor4; + result.extraMattressesNeededFor4 = stats.extraMattressesNeededFor4; + result.sleepingDataQuality = 'DERIVED'; + } + + return result; + + } catch (error) { + console.error('[Puppeteer] Scraper error:', error); + return null; + } finally { + if (browser) { + await browser.close(); + } + } +} + +/** + * Extract external ID from URL + */ +function extractExternalId(url: string): string | null { + const match = url.match(/\/rooms\/(\d+)/); + return match?.[1] || null; +} + +/** + * Extract images from the rendered page + */ +function extractImagesFromPage($: cheerio.CheerioAPI): string[] { + const images: string[] = []; + + // Try og:image + const ogImage = $('meta[property="og:image"]').attr('content'); + if (ogImage) images.push(ogImage); + + // Try JSON-LD images (already handled separately) + + // Try data-testid image elements + $('[data-testid*="photo"] img, [data-testid*="image"] img, [class*="photo"] img').each((_, el) => { + const src = $(el).attr('src') || $(el).attr('data-src') || $(el).attr('data-image'); + if (src && src.startsWith('http') && !images.includes(src)) { + images.push(src); + } + }); + + return images; +} + +/** + * Extract description from the rendered page + */ +function extractDescriptionFromPage($: cheerio.CheerioAPI): string | null { + // Try various selectors for description + const selectors = [ + '[data-section-id="DESCRIPTION_DEFAULT"]', + '#description', + '.description', + '[itemprop="description"]', + ]; + + for (const selector of selectors) { + const text = $(selector).text().trim(); + if (text.length > 20) { + return text.substring(0, 500); + } + } + + return null; +} + +/** + * Extract amenities from the rendered page + */ +function extractAmenitiesFromPage($: cheerio.CheerioAPI): string[] { + const amenities: string[] = []; + + $('[data-testid*="amenity"]').each((_, el) => { + const text = $(el).text().trim(); + if (text && !amenities.includes(text)) { + amenities.push(text); + } + }); + + return amenities; +} + +/** + * Map BedType to spotsPerUnit using BED_TYPE_CONFIG + */ +const BED_TYPE_SPOTS_MAP: Record = (() => { + const map: Record = {}; + for (const config of Object.values(BED_TYPE_CONFIG)) { + if (!(config.type in map)) { + map[config.type] = config.spots; + } + } + return map; +})(); + +/** + * Try to parse sleeping arrangements from Puppeteer page + * This is more reliable than text parsing + */ +async function parseSleepingArrangementsFromPage(page: Page): Promise { + const options: ExtractedListing['sleepingOptions'] = []; + + try { + // Try to find sleeping/bedroom section + const sleepingSection = await page.$('[data-section-id="SLEEPING_CONFIGURATION"]'); + + if (sleepingSection) { + const text = await sleepingSection.evaluate(el => el.textContent); + + // Parse bed types from text + const bedPatterns = [ + /(\d+)\s*(?:×|x)?\s*(queen|king|single|double|twin|full|king-size|queen-size)\s*bed/gi, + /(\d+)\s*(?:×|x)?\s*Futon/gi, + /(\d+)\s*(?:×|x)?\s*Matratze/gi, + /(\d+)\s*(?:×|x)?\s*Couch/gi, + ]; + + for (const pattern of bedPatterns) { + let match; + while ((match = pattern.exec(text)) !== null) { + const quantity = parseInt(match[1], 10); + const bedType = match[2] || 'bed'; + + // Map German/English bed types to BedType enum + let normalizedType: import('./types').BedType = 'UNKNOWN'; + let quality: 'FULL' | 'AUXILIARY' = 'AUXILIARY'; + + const lower = bedType.toLowerCase(); + if (lower.includes('queen')) { + normalizedType = 'QUEEN'; + quality = 'FULL'; + } else if (lower.includes('king')) { + normalizedType = 'KING'; + quality = 'FULL'; + } else if (lower.includes('double') || lower.includes('full')) { + normalizedType = 'DOUBLE'; + quality = 'FULL'; + } else if (lower.includes('twin') || lower.includes('single')) { + normalizedType = 'SINGLE'; + quality = 'FULL'; + } else if (lower.includes('futon')) { + normalizedType = 'FUTON'; + quality = 'AUXILIARY'; + } else if (lower.includes('matratze') || lower.includes('mattress')) { + normalizedType = 'EXTRA_MATTRESS'; + quality = 'AUXILIARY'; + } else if (lower.includes('couch') || lower.includes('sofa')) { + normalizedType = 'SOFA'; + quality = 'AUXILIARY'; + } else { + normalizedType = 'DOUBLE'; + quality = 'FULL'; + } + + options.push({ + bedType: normalizedType, + quantity, + spotsPerUnit: BED_TYPE_SPOTS_MAP[normalizedType] ?? 2, + quality, + }); + } + } + } + } catch (error) { + console.error('[Puppeteer] Error parsing sleeping arrangements:', error); + } + + return options; +} diff --git a/test-scraper-debug.ts b/test-scraper-debug.ts new file mode 100644 index 0000000..12c2bbf --- /dev/null +++ b/test-scraper-debug.ts @@ -0,0 +1,96 @@ +/** + * Debug test - captures more info about what's happening + */ + +import puppeteer from 'puppeteer-extra'; +import StealthPlugin from 'puppeteer-extra-plugin-stealth'; + +puppeteer.use(StealthPlugin()); + +const TEST_URL = 'https://www.airbnb.com/rooms/842937876795894279'; + +async function main() { + console.log('Starting debug test...\n'); + + const browser = await puppeteer.launch({ + headless: true, + args: [ + '--no-sandbox', + '--disable-setuid-sandbox', + '--disable-dev-shm-usage', + '--disable-gpu', + '--window-size=1920,1080', + ], + }); + + const page = await browser.newPage(); + + await page.setViewport({ width: 1920, height: 1080 }); + await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'); + + console.log(`Navigating to: ${TEST_URL}`); + + // Track redirects + page.on('response', (response) => { + const status = response.status(); + const url = response.url(); + if (status >= 300 && status < 400) { + console.log(`🔄 Redirect: ${status} → ${response.headers()['location']?.substring(0, 100)}`); + } + }); + + try { + const response = await page.goto(TEST_URL, { + waitUntil: 'networkidle2', + timeout: 60000 + }); + + console.log(`\n📊 Response status: ${response?.status()}`); + console.log(`📊 Final URL: ${page.url()}`); + console.log(`📊 Page title: ${await page.title()}`); + + // Wait longer for dynamic content + console.log('\n⏳ Waiting 5 seconds for dynamic content...'); + await new Promise(r => setTimeout(r, 5000)); + + // Get page content + const html = await page.content(); + console.log(`\n📄 HTML length: ${html.length} chars`); + + // Check for challenge page + if (html.includes('challenge') || html.includes('captcha') || html.includes('blocked')) { + console.log('⚠️ Possible challenge/blocked page detected!'); + } + + // Check if we're on the homepage + if (page.url() === 'https://www.airbnb.com/' || page.url() === 'https://www.airbnb.com') { + console.log('⚠️ Redirected to homepage - likely blocked!'); + } + + // Extract visible text + const bodyText = await page.evaluate(() => document.body.innerText); + console.log(`\n📝 Body text length: ${bodyText.length} chars`); + console.log(`\n📝 First 500 chars of visible text:\n${bodyText.substring(0, 500)}`); + + // Check for specific listing elements + const hasListingTitle = await page.$('[data-plugin-in-point-id="TITLE_DEFAULT"]'); + const hasPhotos = await page.$('[data-section-id="PHOTO_PICKER"]'); + const hasPrice = await page.$('[data-plugin-in-point-id="PRICE_DEFAULT"]'); + + console.log(`\n🔍 Listing elements found:`); + console.log(` Title section: ${hasListingTitle ? '✅' : '❌'}`); + console.log(` Photos section: ${hasPhotos ? '✅' : '❌'}`); + console.log(` Price section: ${hasPrice ? '✅' : '❌'}`); + + // Take a screenshot + await page.screenshot({ path: 'debug-screenshot.png', fullPage: false }); + console.log(`\n📸 Screenshot saved to: debug-screenshot.png`); + + } catch (error) { + console.error('Error:', error); + } finally { + await browser.close(); + } +} + +main(); diff --git a/test-scraper.ts b/test-scraper.ts new file mode 100644 index 0000000..c70977e --- /dev/null +++ b/test-scraper.ts @@ -0,0 +1,127 @@ +/** + * Test script for Puppeteer-based Airbnb scraper + * Run with: npx tsx test-scraper.ts + */ + +import { scrapeAirbnbWithPuppeteer } from './src/lib/airbnb/puppeteer-scraper'; + +const TEST_URL = 'https://www.airbnb.com/rooms/52367822'; // Valid listing in Bad Bellingen, Germany + +async function main() { + console.log('========================================'); + console.log('Airbnb Puppeteer Scraper Test'); + console.log('========================================\n'); + + console.log(`Testing URL: ${TEST_URL}\n`); + console.log('Starting scraper (this may take 30-60 seconds)...\n'); + + const startTime = Date.now(); + + try { + const result = await scrapeAirbnbWithPuppeteer(TEST_URL); + + const elapsed = ((Date.now() - startTime) / 1000).toFixed(1); + console.log(`\n✅ Scraping completed in ${elapsed}s\n`); + + if (!result) { + console.log('❌ Result is null - scraping may have failed'); + return; + } + + console.log('========================================'); + console.log('EXTRACTED DATA'); + console.log('========================================\n'); + + // Title + console.log('📌 TITLE:'); + console.log(` Value: ${result.title?.value || 'N/A'}`); + console.log(` Source: ${result.title?.source || 'N/A'}`); + console.log(` Confidence: ${result.title?.confidence || 'N/A'}\n`); + + // Price + console.log('💰 PRICE:'); + console.log(` Nightly: ${result.nightlyPrice?.value || 'N/A'} EUR`); + console.log(` Total: ${result.totalPrice?.value || 'N/A'} EUR`); + console.log(` Status: ${result.priceStatus || 'N/A'}\n`); + + // Location + console.log('📍 LOCATION:'); + console.log(` Text: ${result.locationText?.value || 'N/A'}`); + console.log(` Lat/Lng: ${result.latitude}, ${result.longitude}\n`); + + // Rating + console.log('⭐ RATING:'); + console.log(` Rating: ${result.rating?.value || 'N/A'}`); + console.log(` Reviews: ${result.reviewCount?.value || 'N/A'}\n`); + + // Capacity + console.log('🏠 CAPACITY:'); + console.log(` Guests: ${result.guestCount?.value || 'N/A'}`); + console.log(` Bedrooms: ${result.bedrooms?.value || 'N/A'}`); + console.log(` Beds: ${result.beds?.value || 'N/A'}`); + console.log(` Bathrooms: ${result.bathrooms?.value || 'N/A'}\n`); + + // Sleeping Options + console.log('🛏️ SLEEPING OPTIONS:'); + if (result.sleepingOptions && result.sleepingOptions.length > 0) { + result.sleepingOptions.forEach((opt, i) => { + console.log(` ${i + 1}. ${opt.quantity}x ${opt.bedType} (${opt.spotsPerUnit} spots, ${opt.quality})`); + }); + console.log(` Max sleeping places: ${result.maxSleepingPlaces}`); + console.log(` Suitable for 4: ${result.suitableFor4 ? '✅ Yes' : '❌ No'}`); + console.log(` Quality: ${result.sleepingDataQuality}`); + } else { + console.log(' No sleeping options extracted'); + } + console.log(''); + + // Host + console.log('👤 HOST:'); + console.log(` Name: ${result.hostName?.value || 'N/A'}\n`); + + // Images + console.log('🖼️ IMAGES:'); + console.log(` Count: ${result.images?.length || 0}`); + if (result.images && result.images.length > 0) { + console.log(` First 3:`); + result.images.slice(0, 3).forEach((img, i) => { + console.log(` ${i + 1}. ${img.substring(0, 80)}...`); + }); + } + console.log(''); + + // Amenities + console.log('✨ AMENITIES:'); + console.log(` Count: ${result.amenities?.length || 0}`); + if (result.amenities && result.amenities.length > 0) { + console.log(` First 10: ${result.amenities.slice(0, 10).join(', ')}`); + } + console.log(''); + + // Description + console.log('📝 DESCRIPTION:'); + const desc = result.description?.value || 'N/A'; + console.log(` ${desc.substring(0, 200)}${desc.length > 200 ? '...' : ''}\n`); + + // External ID + console.log('🔗 EXTERNAL ID:'); + console.log(` ${result.externalId || 'N/A'}\n`); + + // Extraction Log + console.log('📋 EXTRACTION LOG:'); + result.extractionLog?.forEach(log => { + console.log(` - ${log}`); + }); + + console.log('\n========================================'); + console.log('TEST COMPLETE'); + console.log('========================================'); + + } catch (error) { + const elapsed = ((Date.now() - startTime) / 1000).toFixed(1); + console.log(`\n❌ Error after ${elapsed}s:`); + console.error(error); + } +} + +main().catch(console.error);