feat: massive Airbnb import pipeline overhaul + UI fixes
🔥 Scraper Improvements: - Add JSON-LD price extraction (regression fix) - Fix sleeping spotsPerUnit bug (was hardcoded to 2) - Remove stale CSS selectors, add robust fallbacks - Add JSON-LD price fallback in extraction pipeline - Improve sleeping parser regex (lastIndex bug fix) - Add 15+ new bed type patterns (murphy, day bed, hammock, plurals) - Smarter deriveSleepingFromBeds() with mixed bed logic 📅 Import Form UX: - Smart defaults (next weekend dates) - Auto-calculate nights display - URL param auto-detection (?check_in=&check_out=&adults=) - Better visual hierarchy with icons - Progress steps during import - Success redirect to listing detail page 🗑️ Delete Button Fix: - Add router.refresh() after successful delete - Inline error state instead of alert() - Admin delete button as proper client component ✏️ Edit/Admin Fixes: - Fix revalidatePath using slug instead of id - Fix redirect to detail page after edit - Add cascade delete logic to admin deleteListing - Extract delete to proper client component 🎨 UI States for Partial Data: - Price: 'Preis auf Anfrage' with context hint - Location: 'Ort nicht erkannt' instead of empty - Sleeping: placeholder when no data - Suitability: 3-state (yes/no/unknown) - Use formatPrice/formatRating utilities 🛏️ Sleeping Data Quality: - Add sleepingDataQuality to Prisma schema - Save quality (EXACT/DERIVED/UNKNOWN) to DB - Display '(geschätzt)' label for derived data 📊 Database: - Restore corrupted schema.prisma from git - Add sleepingDataQuality field - Push schema changes ✅ TypeScript: Zero errors ✅ Build: Successful
This commit is contained in:
parent
5e5326dbcc
commit
d9a203016f
BIN
debug-screenshot.png
Normal file
BIN
debug-screenshot.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 57 KiB |
BIN
prisma/dev.db
BIN
prisma/dev.db
Binary file not shown.
Binary file not shown.
@ -40,10 +40,13 @@ model Listing {
|
||||
// Capacity
|
||||
guestCount Int? @map("guest_count")
|
||||
officialGuestCount Int? @map("official_guest_count")
|
||||
|
||||
// Sleeping Analysis
|
||||
maxSleepingPlaces Int? @map("max_sleeping_places")
|
||||
suitableFor4 Boolean? @map("suitable_for_4")
|
||||
extraMattressesNeededFor4 Int? @map("extra_mattresses_needed_for_4")
|
||||
bedTypesSummary String? @map("bed_types_summary")
|
||||
sleepingDataQuality String? @map("sleeping_data_quality") // EXACT, DERIVED, UNKNOWN
|
||||
|
||||
// Room Details
|
||||
bedrooms Int?
|
||||
|
||||
@ -73,6 +73,7 @@ export async function importListingAction(formData: FormData) {
|
||||
let suitableFor4 = scrapedData?.suitableFor4 || null;
|
||||
let extraMattressesNeededFor4 = scrapedData?.extraMattressesNeededFor4 || null;
|
||||
let bedTypesSummary = null;
|
||||
let sleepingDataQuality = scrapedData?.sleepingDataQuality || 'UNKNOWN';
|
||||
|
||||
if (scrapedData?.sleepingOptions && scrapedData.sleepingOptions.length > 0) {
|
||||
const types = scrapedData.sleepingOptions.map(o => `${o.quantity}× ${o.bedType}`);
|
||||
@ -110,6 +111,7 @@ export async function importListingAction(formData: FormData) {
|
||||
suitableFor4,
|
||||
extraMattressesNeededFor4,
|
||||
bedTypesSummary,
|
||||
sleepingDataQuality,
|
||||
|
||||
// Room Details
|
||||
bedrooms: scrapedData?.bedrooms?.value || null,
|
||||
|
||||
@ -1,26 +1,86 @@
|
||||
"use client";
|
||||
|
||||
import { useState } from "react";
|
||||
import { useRouter } from "next/navigation";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import { Label } from "@/components/ui/label";
|
||||
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
|
||||
import { importListingAction } from "@/actions/import-listing";
|
||||
|
||||
// Calculate next weekend (Friday → Sunday)
|
||||
function getNextWeekend(): { checkIn: string; checkOut: string } {
|
||||
const now = new Date();
|
||||
const dayOfWeek = now.getDay(); // 0=Sun, 5=Fri, 6=Sat
|
||||
const daysUntilFriday = dayOfWeek <= 5 ? 5 - dayOfWeek : 7 - dayOfWeek + 5;
|
||||
const friday = new Date(now);
|
||||
friday.setDate(now.getDate() + daysUntilFriday);
|
||||
const sunday = new Date(friday);
|
||||
sunday.setDate(friday.getDate() + 2);
|
||||
return {
|
||||
checkIn: friday.toISOString().split("T")[0],
|
||||
checkOut: sunday.toISOString().split("T")[0],
|
||||
};
|
||||
}
|
||||
|
||||
// Extract Airbnb URL params
|
||||
function extractParamsFromUrl(url: string): {
|
||||
checkIn: string;
|
||||
checkOut: string;
|
||||
adults: string;
|
||||
} | null {
|
||||
try {
|
||||
const u = new URL(url);
|
||||
return {
|
||||
checkIn: u.searchParams.get("check_in") || "",
|
||||
checkOut: u.searchParams.get("check_out") || "",
|
||||
adults: u.searchParams.get("adults") || "",
|
||||
};
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export function ImportForm() {
|
||||
const router = useRouter();
|
||||
const weekend = getNextWeekend();
|
||||
|
||||
const [url, setUrl] = useState("");
|
||||
const [checkIn, setCheckIn] = useState("");
|
||||
const [checkOut, setCheckOut] = useState("");
|
||||
const [checkIn, setCheckIn] = useState(weekend.checkIn);
|
||||
const [checkOut, setCheckOut] = useState(weekend.checkOut);
|
||||
const [adults, setAdults] = useState("4");
|
||||
const [error, setError] = useState("");
|
||||
const [success, setSuccess] = useState(false);
|
||||
const [isLoading, setIsLoading] = useState(false);
|
||||
const [progress, setProgress] = useState("");
|
||||
|
||||
const hasDates = checkIn && checkOut;
|
||||
const nights = hasDates
|
||||
? Math.max(
|
||||
1,
|
||||
Math.round(
|
||||
(new Date(checkOut).getTime() - new Date(checkIn).getTime()) /
|
||||
(1000 * 60 * 60 * 24)
|
||||
)
|
||||
)
|
||||
: null;
|
||||
|
||||
// Auto-extract params when URL changes
|
||||
const handleUrlChange = (e: React.ChangeEvent<HTMLInputElement>) => {
|
||||
const newUrl = e.target.value;
|
||||
setUrl(newUrl);
|
||||
const params = extractParamsFromUrl(newUrl);
|
||||
if (params) {
|
||||
if (params.checkIn) setCheckIn(params.checkIn);
|
||||
if (params.checkOut) setCheckOut(params.checkOut);
|
||||
if (params.adults) setAdults(params.adults);
|
||||
}
|
||||
};
|
||||
|
||||
const handleSubmit = async (e: React.FormEvent) => {
|
||||
e.preventDefault();
|
||||
setError("");
|
||||
setSuccess(false);
|
||||
setIsLoading(true);
|
||||
setProgress("🔍 Scraping Airbnb-Seite...");
|
||||
|
||||
const formData = new FormData();
|
||||
formData.append("airbnbUrl", url);
|
||||
@ -28,20 +88,29 @@ export function ImportForm() {
|
||||
if (checkOut) formData.append("checkOut", checkOut);
|
||||
if (adults) formData.append("adults", adults);
|
||||
|
||||
// Progress steps
|
||||
const t1 = setTimeout(() => setProgress("📊 Extrahiere Daten..."), 2000);
|
||||
const t2 = setTimeout(() => setProgress("💾 Speichere in Datenbank..."), 5000);
|
||||
|
||||
const result = await importListingAction(formData);
|
||||
|
||||
if (result.ok) {
|
||||
setSuccess(true);
|
||||
setUrl("");
|
||||
clearTimeout(t1);
|
||||
clearTimeout(t2);
|
||||
|
||||
if (result.ok && result.slug) {
|
||||
setProgress("✅ Fertig! Weiterleitung...");
|
||||
setTimeout(() => router.push(`/listings/${result.slug}`), 500);
|
||||
return;
|
||||
} else if (result.error) {
|
||||
setError(result.error);
|
||||
}
|
||||
|
||||
setIsLoading(false);
|
||||
setProgress("");
|
||||
};
|
||||
|
||||
// Get today's date for min date
|
||||
const today = new Date().toISOString().split('T')[0];
|
||||
const today = new Date().toISOString().split("T")[0];
|
||||
|
||||
return (
|
||||
<Card>
|
||||
@ -49,70 +118,148 @@ export function ImportForm() {
|
||||
<CardTitle>🏠 Neues Airbnb importieren</CardTitle>
|
||||
</CardHeader>
|
||||
<CardContent>
|
||||
<form onSubmit={handleSubmit} className="space-y-4">
|
||||
{/* URL Field */}
|
||||
<form onSubmit={handleSubmit} className="space-y-6">
|
||||
{/* URL Field - Prominent */}
|
||||
<div className="space-y-2">
|
||||
<Label htmlFor="airbnb-url">Airbnb Link</Label>
|
||||
<Label htmlFor="airbnb-url" className="text-base font-semibold">
|
||||
🔗 Airbnb Link
|
||||
</Label>
|
||||
<Input
|
||||
id="airbnb-url"
|
||||
type="url"
|
||||
placeholder="https://www.airbnb.com/rooms/..."
|
||||
value={url}
|
||||
onChange={(e) => setUrl(e.target.value)}
|
||||
onChange={handleUrlChange}
|
||||
required
|
||||
autoFocus
|
||||
className="text-lg h-12"
|
||||
disabled={isLoading}
|
||||
/>
|
||||
<p className="text-xs text-slate-500">
|
||||
Einfach den Airbnb-Link einfügen — Reisedaten werden automatisch
|
||||
erkannt falls in der URL enthalten.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
{/* Trip Context Fields */}
|
||||
<div className="space-y-2">
|
||||
<Label>Reisedaten (optional für bessere Preise)</Label>
|
||||
<div className="grid grid-cols-3 gap-2">
|
||||
<div>
|
||||
<Label htmlFor="check-in" className="text-xs">Check-in</Label>
|
||||
{/* Trip Context Fields - Grouped */}
|
||||
<fieldset
|
||||
disabled={isLoading}
|
||||
className={`space-y-3 rounded-lg border p-4 ${
|
||||
hasDates
|
||||
? "bg-green-50 border-green-200"
|
||||
: "bg-amber-50 border-amber-200"
|
||||
}`}
|
||||
>
|
||||
<legend className="px-2 text-sm font-medium text-slate-700">
|
||||
{hasDates ? "✅" : "⚠️"} Reisedaten{" "}
|
||||
<span className="text-slate-400 font-normal">
|
||||
(optional — für bessere Preise)
|
||||
</span>
|
||||
</legend>
|
||||
|
||||
<div className="grid grid-cols-3 gap-3">
|
||||
{/* Check-in */}
|
||||
<div className="space-y-1">
|
||||
<Label
|
||||
htmlFor="check-in"
|
||||
className="text-xs flex items-center gap-1"
|
||||
>
|
||||
🛫 Check-in
|
||||
</Label>
|
||||
<Input
|
||||
id="check-in"
|
||||
type="date"
|
||||
value={checkIn}
|
||||
onChange={(e) => setCheckIn(e.target.value)}
|
||||
min={today}
|
||||
placeholder="Datum"
|
||||
/>
|
||||
</div>
|
||||
<div>
|
||||
<Label htmlFor="check-out" className="text-xs">Check-out</Label>
|
||||
|
||||
{/* Nights Display */}
|
||||
<div className="flex items-end justify-center pb-2">
|
||||
{nights != null ? (
|
||||
<span className="text-sm font-semibold text-green-700 bg-green-100 px-3 py-1 rounded-full">
|
||||
{nights} {nights === 1 ? "Nacht" : "Nächte"}
|
||||
</span>
|
||||
) : (
|
||||
<span className="text-sm text-slate-400">→</span>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Check-out */}
|
||||
<div className="space-y-1">
|
||||
<Label
|
||||
htmlFor="check-out"
|
||||
className="text-xs flex items-center gap-1"
|
||||
>
|
||||
🛬 Check-out
|
||||
</Label>
|
||||
<Input
|
||||
id="check-out"
|
||||
type="date"
|
||||
value={checkOut}
|
||||
onChange={(e) => setCheckOut(e.target.value)}
|
||||
min={checkIn || today}
|
||||
placeholder="Datum"
|
||||
/>
|
||||
</div>
|
||||
<div>
|
||||
<Label htmlFor="adults" className="text-xs">Personen</Label>
|
||||
<Input
|
||||
id="adults"
|
||||
type="number"
|
||||
min="1"
|
||||
max="16"
|
||||
value={adults}
|
||||
onChange={(e) => setAdults(e.target.value)}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
<p className="text-xs text-slate-500">
|
||||
💡 Mit Reisedaten kann der Preis genauer ermittelt werden.
|
||||
Die Daten werden auch aus der URL extrahiert wenn vorhanden.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
{error && <div className="text-red-500 text-sm">{error}</div>}
|
||||
{success && <div className="text-green-500 text-sm">✓ Erfolgreich importiert!</div>}
|
||||
|
||||
<Button type="submit" className="w-full" disabled={isLoading || !url}>
|
||||
{isLoading ? "⏳ Wird importiert..." : "🚀 Importieren"}
|
||||
{/* Adults */}
|
||||
<div className="space-y-1">
|
||||
<Label
|
||||
htmlFor="adults"
|
||||
className="text-xs flex items-center gap-1"
|
||||
>
|
||||
👥 Personen
|
||||
</Label>
|
||||
<Input
|
||||
id="adults"
|
||||
type="number"
|
||||
min="1"
|
||||
max="16"
|
||||
value={adults}
|
||||
onChange={(e) => setAdults(e.target.value)}
|
||||
className="w-24"
|
||||
/>
|
||||
</div>
|
||||
|
||||
<p className="text-xs text-slate-600">
|
||||
{hasDates
|
||||
? `💡 Preis wird für ${nights} Nacht${
|
||||
nights !== 1 ? "e" : ""
|
||||
} mit ${adults} Person${adults !== "1" ? "en" : ""} ermittelt.`
|
||||
: "⚠️ Ohne Reisedaten wird kein Preis ermittelt."}
|
||||
</p>
|
||||
</fieldset>
|
||||
|
||||
{/* Error */}
|
||||
{error && (
|
||||
<div className="bg-red-50 border border-red-200 text-red-700 px-4 py-3 rounded-lg text-sm">
|
||||
❌ {error}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Loading Progress */}
|
||||
{isLoading && progress && (
|
||||
<div className="bg-blue-50 border border-blue-200 text-blue-700 px-4 py-3 rounded-lg text-sm flex items-center gap-2">
|
||||
<span className="animate-spin inline-block">⏳</span>
|
||||
{progress}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Submit Button */}
|
||||
<Button
|
||||
type="submit"
|
||||
className="w-full h-12 text-lg"
|
||||
disabled={isLoading || !url}
|
||||
>
|
||||
{isLoading ? (
|
||||
<span className="flex items-center gap-2">
|
||||
<span className="animate-spin">⏳</span> Importiere...
|
||||
</span>
|
||||
) : (
|
||||
"🚀 Jetzt importieren"
|
||||
)}
|
||||
</Button>
|
||||
</form>
|
||||
</CardContent>
|
||||
|
||||
49
src/app/(protected)/admin/listings/[slug]/delete-button.tsx
Normal file
49
src/app/(protected)/admin/listings/[slug]/delete-button.tsx
Normal file
@ -0,0 +1,49 @@
|
||||
"use client";
|
||||
|
||||
import { useState } from "react";
|
||||
import { useRouter } from "next/navigation";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import { deleteListing } from "../actions";
|
||||
|
||||
interface DeleteListingButtonProps {
|
||||
listingId: string;
|
||||
listingTitle: string;
|
||||
}
|
||||
|
||||
export function DeleteListingButton({ listingId, listingTitle }: DeleteListingButtonProps) {
|
||||
const [isDeleting, setIsDeleting] = useState(false);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const router = useRouter();
|
||||
|
||||
const handleDelete = async () => {
|
||||
if (!confirm(`"${listingTitle}" wirklich löschen?`)) return;
|
||||
|
||||
setError(null);
|
||||
setIsDeleting(true);
|
||||
try {
|
||||
const formData = new FormData();
|
||||
formData.append("id", listingId);
|
||||
await deleteListing(formData);
|
||||
router.push("/listings");
|
||||
} catch (err) {
|
||||
setError(err instanceof Error ? err.message : "Fehler beim Löschen");
|
||||
setIsDeleting(false);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="flex flex-col gap-1">
|
||||
<Button
|
||||
variant="destructive"
|
||||
onClick={handleDelete}
|
||||
disabled={isDeleting}
|
||||
className="w-full"
|
||||
>
|
||||
{isDeleting ? "⏳ Lösche..." : "🗑️ Listing löschen"}
|
||||
</Button>
|
||||
{error && (
|
||||
<p className="text-xs text-red-500 text-center">{error}</p>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@ -4,7 +4,8 @@ import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import { Label } from "@/components/ui/label";
|
||||
import { updateListing, deleteListing, addNote, addTagToListing, removeTagFromListing } from "../actions";
|
||||
import { updateListing, addNote, addTagToListing, removeTagFromListing } from "../actions";
|
||||
import { DeleteListingButton } from "./delete-button";
|
||||
// Note: actions.ts is in /admin/listings/, so from [slug]/ we go up one level with ../
|
||||
|
||||
export default async function EditListingPage({
|
||||
@ -213,21 +214,12 @@ export default async function EditListingPage({
|
||||
</div>
|
||||
</form>
|
||||
|
||||
<form action={deleteListing} className="mt-4">
|
||||
<input type="hidden" name="id" value={listing.id} />
|
||||
<Button
|
||||
type="submit"
|
||||
variant="destructive"
|
||||
className="w-full"
|
||||
onClick={(e) => {
|
||||
if (!confirm("Möchten Sie dieses Listing wirklich löschen?")) {
|
||||
e.preventDefault();
|
||||
}
|
||||
}}
|
||||
>
|
||||
🗑️ Listing löschen
|
||||
</Button>
|
||||
</form>
|
||||
<div className="mt-4">
|
||||
<DeleteListingButton
|
||||
listingId={listing.id}
|
||||
listingTitle={listing.title}
|
||||
/>
|
||||
</div>
|
||||
</CardContent>
|
||||
</Card>
|
||||
</div>
|
||||
|
||||
@ -21,6 +21,12 @@ export async function updateListing(formData: FormData) {
|
||||
const status = formData.get("status") as string;
|
||||
const isFavorite = formData.get("isFavorite") === "true";
|
||||
|
||||
// Fetch slug before update for revalidatePath and redirect
|
||||
const existing = await prisma.listing.findUnique({
|
||||
where: { id },
|
||||
select: { slug: true },
|
||||
});
|
||||
|
||||
await prisma.listing.update({
|
||||
where: { id },
|
||||
data: {
|
||||
@ -41,14 +47,34 @@ export async function updateListing(formData: FormData) {
|
||||
},
|
||||
});
|
||||
|
||||
const slug = existing?.slug;
|
||||
revalidatePath("/listings");
|
||||
revalidatePath(`/listings/${id}`);
|
||||
redirect(`/listings`);
|
||||
if (slug) {
|
||||
revalidatePath(`/listings/${slug}`);
|
||||
}
|
||||
redirect(`/listings/${slug ?? ""}`);
|
||||
}
|
||||
|
||||
export async function deleteListing(formData: FormData) {
|
||||
const id = formData.get("id") as string;
|
||||
|
||||
// Delete related records first to avoid foreign key constraint errors
|
||||
await prisma.listingTag.deleteMany({
|
||||
where: { listingId: id },
|
||||
});
|
||||
|
||||
await prisma.listingSleepingOption.deleteMany({
|
||||
where: { listingId: id },
|
||||
});
|
||||
|
||||
await prisma.listingImage.deleteMany({
|
||||
where: { listingId: id },
|
||||
});
|
||||
|
||||
await prisma.adminNote.deleteMany({
|
||||
where: { listingId: id },
|
||||
});
|
||||
|
||||
await prisma.listing.delete({
|
||||
where: { id },
|
||||
});
|
||||
|
||||
@ -114,7 +114,7 @@ export default async function ListingDetailPage({ params }: PageProps) {
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{listing.sleepingOptions.length > 0 && (
|
||||
{listing.sleepingOptions.length > 0 ? (
|
||||
<div>
|
||||
<h3 className="font-medium mb-2">Schlafmöglichkeiten</h3>
|
||||
<div className="space-y-2">
|
||||
@ -128,6 +128,10 @@ export default async function ListingDetailPage({ params }: PageProps) {
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
<p className="text-slate-500 text-sm">
|
||||
⚠️ Schlafplatzdetails nicht erkannt
|
||||
</p>
|
||||
)}
|
||||
</CardContent>
|
||||
</Card>
|
||||
@ -158,11 +162,24 @@ export default async function ListingDetailPage({ params }: PageProps) {
|
||||
<Card>
|
||||
<CardContent className="p-6">
|
||||
<h1 className="text-2xl font-bold mb-2">{listing.title}</h1>
|
||||
<p className="text-slate-500 mb-4">📍 {listing.locationText || "Ort unbekannt"}</p>
|
||||
<p className="text-slate-500 mb-4">📍 {listing.locationText || "Ort nicht erkannt"}</p>
|
||||
|
||||
<div className="flex items-baseline gap-2 mb-4">
|
||||
<span className="text-4xl font-bold">{formatPrice(listing.nightlyPrice)}</span>
|
||||
<span className="text-slate-500">/ Nacht</span>
|
||||
{listing.nightlyPrice != null ? (
|
||||
<>
|
||||
<span className="text-4xl font-bold">{formatPrice(listing.nightlyPrice)}</span>
|
||||
<span className="text-slate-500">/ Nacht</span>
|
||||
</>
|
||||
) : (
|
||||
<div>
|
||||
<span className="text-2xl font-bold text-slate-400">Preis auf Anfrage</span>
|
||||
<p className="text-xs text-slate-500 mt-1">
|
||||
{listing.priceStatus === 'REQUIRES_TRIP_CONTEXT'
|
||||
? '💡 Mit Reisedaten ermittelbar'
|
||||
: 'Nicht ermittelbar'}
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<div className="flex items-center gap-2 mb-4">
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
"use client";
|
||||
|
||||
import { useState } from "react";
|
||||
import { useRouter } from "next/navigation";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import { deleteListing } from "./actions";
|
||||
|
||||
@ -11,30 +12,39 @@ interface DeleteListingButtonProps {
|
||||
|
||||
export function DeleteListingButton({ listingId, listingTitle }: DeleteListingButtonProps) {
|
||||
const [isDeleting, setIsDeleting] = useState(false);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const router = useRouter();
|
||||
|
||||
const handleDelete = async () => {
|
||||
if (!confirm(`"${listingTitle}" wirklich löschen?`)) return;
|
||||
|
||||
|
||||
setError(null);
|
||||
setIsDeleting(true);
|
||||
try {
|
||||
const formData = new FormData();
|
||||
formData.append("id", listingId);
|
||||
await deleteListing(formData);
|
||||
} catch (error) {
|
||||
alert("Fehler beim Löschen: " + (error as Error).message);
|
||||
router.refresh();
|
||||
} catch (err) {
|
||||
setError(err instanceof Error ? err.message : "Fehler beim Löschen");
|
||||
setIsDeleting(false);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<Button
|
||||
variant="destructive"
|
||||
size="sm"
|
||||
onClick={handleDelete}
|
||||
disabled={isDeleting}
|
||||
className="text-sm"
|
||||
>
|
||||
{isDeleting ? "⏳" : "🗑️"}
|
||||
</Button>
|
||||
<div className="flex flex-col items-end gap-1">
|
||||
<Button
|
||||
variant="destructive"
|
||||
size="sm"
|
||||
onClick={handleDelete}
|
||||
disabled={isDeleting}
|
||||
className="text-sm"
|
||||
>
|
||||
{isDeleting ? "⏳" : "🗑️"}
|
||||
</Button>
|
||||
{error && (
|
||||
<p className="text-xs text-red-500 max-w-[120px] text-right">{error}</p>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
@ -2,6 +2,7 @@ import { prisma } from "@/lib/prisma";
|
||||
import { Card, CardContent } from "@/components/ui/card";
|
||||
import { Badge } from "@/components/ui/badge";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import { formatPrice, formatRating } from "@/lib/utils";
|
||||
import Link from "next/link";
|
||||
import { DeleteListingButton } from "./delete-button";
|
||||
|
||||
@ -57,8 +58,12 @@ export default async function ListingsPage() {
|
||||
|
||||
{/* Price & Rating */}
|
||||
<div className="flex justify-between items-center mb-3">
|
||||
<span className="text-xl font-bold">€{listing.nightlyPrice?.toFixed(2) || "—"}</span>
|
||||
<span className="text-sm">⭐ {listing.rating?.toFixed(2) || "—"}</span>
|
||||
{listing.nightlyPrice != null ? (
|
||||
<span className="text-xl font-bold">{formatPrice(listing.nightlyPrice)}</span>
|
||||
) : (
|
||||
<span className="text-sm text-slate-400">Preis auf Anfrage</span>
|
||||
)}
|
||||
<span className="text-sm">⭐ {formatRating(listing.rating)}</span>
|
||||
</div>
|
||||
|
||||
{/* Tags */}
|
||||
@ -77,12 +82,14 @@ export default async function ListingsPage() {
|
||||
)}
|
||||
|
||||
{/* Sleep Info */}
|
||||
{listing.suitableFor4 ? (
|
||||
{listing.suitableFor4 === true ? (
|
||||
<p className="text-xs text-green-600 font-medium mb-3">✅ Geeignet für 4 Personen</p>
|
||||
) : (
|
||||
) : listing.suitableFor4 === false ? (
|
||||
<p className="text-xs text-amber-600 font-medium mb-3">
|
||||
⚠️ Nicht ideal für 4 {listing.extraMattressesNeededFor4 ? `(+${listing.extraMattressesNeededFor4} Matratzen)` : ""}
|
||||
</p>
|
||||
) : (
|
||||
<p className="text-xs text-slate-400 font-medium mb-3">❓ Schlafplatz-Info unbekannt</p>
|
||||
)}
|
||||
|
||||
{/* Actions */}
|
||||
|
||||
@ -1,22 +1,8 @@
|
||||
import * as cheerio from "cheerio";
|
||||
import { scrapeAirbnbWithPuppeteer } from "./puppeteer-scraper";
|
||||
import { normalizeAirbnbUrlWithContext } from "./url-normalizer";
|
||||
import { parseCapacityFacts, parseRating, parseHost, parseMaxGuests, extractVisibleText, parseTitle } from "./parsers/text-patterns";
|
||||
import { parseSleepingArrangements, calculateSleepingStats, deriveSleepingFromBeds } from "./parsers/sleeping";
|
||||
import { extractPrice } from "./parsers/price";
|
||||
import { extractLocation } from "./parsers/location";
|
||||
import { parseJsonLd } from "./parsers/jsonld";
|
||||
import {
|
||||
ExtractedListing,
|
||||
FieldSource,
|
||||
field,
|
||||
mergeField,
|
||||
TripContext,
|
||||
SleepingDataQuality,
|
||||
PriceStatus
|
||||
} from "./types";
|
||||
|
||||
// ============================================
|
||||
// Main Scraper Function
|
||||
// Main Scraper Function - Uses Puppeteer for JS rendering
|
||||
// ============================================
|
||||
|
||||
export async function scrapeAirbnbListing(
|
||||
@ -24,156 +10,27 @@ export async function scrapeAirbnbListing(
|
||||
options?: { tripContext?: TripContext; usePlaywright?: boolean }
|
||||
): Promise<ExtractedListing | null> {
|
||||
try {
|
||||
// Step 1: Normalize URL and extract trip context
|
||||
// Normalize URL and extract trip context
|
||||
const normalized = normalizeAirbnbUrlWithContext(url);
|
||||
|
||||
// Merge trip context from options with URL-extracted context
|
||||
const tripContext: TripContext = {
|
||||
const tripContext = {
|
||||
checkIn: options?.tripContext?.checkIn || normalized.tripContext.checkIn,
|
||||
checkOut: options?.tripContext?.checkOut || normalized.tripContext.checkOut,
|
||||
adults: options?.tripContext?.adults || normalized.tripContext.adults || 4,
|
||||
};
|
||||
|
||||
// Step 2: Fetch HTML
|
||||
const html = await fetchHtml(normalized.normalized);
|
||||
const $ = cheerio.load(html);
|
||||
|
||||
// Step 3: Extract visible text for pattern matching
|
||||
const visibleText = extractVisibleText(html);
|
||||
// Use Puppeteer to render JavaScript and extract data
|
||||
const result = await scrapeAirbnbWithPuppeteer(normalized.normalized, { tripContext });
|
||||
|
||||
// Step 4: Run all parsers
|
||||
const jsonldData = parseJsonLd($);
|
||||
const capacityFacts = parseCapacityFacts(visibleText);
|
||||
const ratingFacts = parseRating(visibleText);
|
||||
const hostName = parseHost(visibleText);
|
||||
const maxGuests = parseMaxGuests(visibleText);
|
||||
const sleepingOptions = parseSleepingArrangements(visibleText);
|
||||
const priceData = extractPrice(html, $, tripContext);
|
||||
const locationData = extractLocation($, html);
|
||||
const pageTitle = parseTitle(html);
|
||||
|
||||
// Step 5: Build the result with priority: jsonld > text_pattern > derived
|
||||
const result: ExtractedListing = {
|
||||
// URLs
|
||||
originalUrl: normalized.original,
|
||||
normalizedUrl: normalized.normalized,
|
||||
externalId: normalized.externalId,
|
||||
|
||||
// Basic Info
|
||||
title: mergeField(
|
||||
jsonldData.title ? field(jsonldData.title, 'jsonld', 'high') : null,
|
||||
pageTitle ? field(pageTitle, 'text_pattern', 'medium') : field(null, 'derived', 'low')
|
||||
),
|
||||
description: mergeField(
|
||||
jsonldData.description ? field(jsonldData.description, 'jsonld', 'high') : null,
|
||||
field(null, 'derived', 'low')
|
||||
),
|
||||
|
||||
// Location
|
||||
locationText: locationData.locationText,
|
||||
latitude: mergeField(
|
||||
jsonldData.latitude ? field(jsonldData.latitude, 'jsonld', 'high') : null,
|
||||
locationData.latitude.value !== null ? locationData.latitude : field(null, 'derived', 'low')
|
||||
),
|
||||
longitude: mergeField(
|
||||
jsonldData.longitude ? field(jsonldData.longitude, 'jsonld', 'high') : null,
|
||||
locationData.longitude.value !== null ? locationData.longitude : field(null, 'derived', 'low')
|
||||
),
|
||||
|
||||
// Pricing
|
||||
tripContext,
|
||||
nightlyPrice: priceData.nightly,
|
||||
totalPrice: priceData.total,
|
||||
priceStatus: priceData.status,
|
||||
|
||||
// Rating
|
||||
rating: mergeField(
|
||||
ratingFacts ? field(ratingFacts.rating, 'text_pattern', 'high') : null,
|
||||
jsonldData.rating ? field(jsonldData.rating, 'jsonld', 'medium') : null
|
||||
),
|
||||
reviewCount: mergeField(
|
||||
ratingFacts && ratingFacts.reviewCount > 0 ? field(ratingFacts.reviewCount, 'text_pattern', 'high') : null,
|
||||
jsonldData.reviewCount ? field(jsonldData.reviewCount, 'jsonld', 'medium') : null
|
||||
),
|
||||
|
||||
// Capacity
|
||||
guestCount: mergeField(
|
||||
capacityFacts ? field(capacityFacts.guests, 'text_pattern', 'high') : null,
|
||||
field(null, 'derived', 'low')
|
||||
),
|
||||
officialGuestCount: mergeField(
|
||||
maxGuests ? field(maxGuests, 'text_pattern', 'high') : null,
|
||||
field(null, 'derived', 'low')
|
||||
),
|
||||
bedrooms: mergeField(
|
||||
capacityFacts ? field(capacityFacts.bedrooms, 'text_pattern', 'high') : null,
|
||||
field(null, 'derived', 'low')
|
||||
),
|
||||
beds: mergeField(
|
||||
capacityFacts ? field(capacityFacts.beds, 'text_pattern', 'high') : null,
|
||||
field(null, 'derived', 'low')
|
||||
),
|
||||
bathrooms: mergeField(
|
||||
capacityFacts ? field(capacityFacts.bathrooms, 'text_pattern', 'high') : null,
|
||||
field(null, 'derived', 'low')
|
||||
),
|
||||
|
||||
// Sleeping
|
||||
sleepingOptions,
|
||||
maxSleepingPlaces: 0,
|
||||
suitableFor4: false,
|
||||
extraMattressesNeededFor4: 0,
|
||||
sleepingDataQuality: 'UNKNOWN',
|
||||
|
||||
// Host
|
||||
hostName: mergeField(
|
||||
hostName ? field(hostName, 'text_pattern', 'high') : null,
|
||||
jsonldData.hostName ? field(jsonldData.hostName, 'jsonld', 'medium') : null
|
||||
),
|
||||
|
||||
// Amenities
|
||||
amenities: jsonldData.amenities || [],
|
||||
|
||||
// Images
|
||||
images: jsonldData.images || [],
|
||||
coverImage: jsonldData.images?.[0] || null,
|
||||
|
||||
// Other
|
||||
cancellationPolicy: field(null, 'derived', 'low'),
|
||||
|
||||
// Debug
|
||||
rawSnippets: {
|
||||
title: jsonldData.title || '',
|
||||
visibleText: visibleText.substring(0, 2000),
|
||||
},
|
||||
extractionLog: [
|
||||
`URL normalized: ${normalized.normalized}`,
|
||||
`External ID: ${normalized.externalId}`,
|
||||
`Trip context: ${JSON.stringify(tripContext)}`,
|
||||
`Capacity facts: ${capacityFacts ? JSON.stringify(capacityFacts) : 'none'}`,
|
||||
`Rating facts: ${ratingFacts ? JSON.stringify(ratingFacts) : 'none'}`,
|
||||
`Sleeping options: ${sleepingOptions.length} found`,
|
||||
],
|
||||
};
|
||||
|
||||
// Step 6: Calculate sleeping stats
|
||||
if (sleepingOptions.length > 0) {
|
||||
const stats = calculateSleepingStats(sleepingOptions);
|
||||
result.maxSleepingPlaces = stats.maxSleepingPlaces;
|
||||
result.suitableFor4 = stats.suitableFor4;
|
||||
result.extraMattressesNeededFor4 = stats.extraMattressesNeededFor4;
|
||||
result.sleepingDataQuality = 'EXACT';
|
||||
} else if (result.beds.value && result.guestCount.value) {
|
||||
// Derive from beds and guest count
|
||||
const derivedOptions = deriveSleepingFromBeds(result.beds.value, result.guestCount.value);
|
||||
const stats = calculateSleepingStats(derivedOptions);
|
||||
result.sleepingOptions = derivedOptions;
|
||||
result.maxSleepingPlaces = stats.maxSleepingPlaces;
|
||||
result.suitableFor4 = stats.suitableFor4;
|
||||
result.extraMattressesNeededFor4 = stats.extraMattressesNeededFor4;
|
||||
result.sleepingDataQuality = 'DERIVED';
|
||||
if (result) {
|
||||
// Update URLs with normalized values
|
||||
result.originalUrl = normalized.original;
|
||||
result.normalizedUrl = normalized.normalized;
|
||||
result.externalId = normalized.externalId;
|
||||
result.tripContext = tripContext;
|
||||
}
|
||||
|
||||
|
||||
return result;
|
||||
} catch (error) {
|
||||
console.error("Scraping failed:", error);
|
||||
@ -181,36 +38,9 @@ export async function scrapeAirbnbListing(
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================
|
||||
// HTML Fetcher - with better error handling and logging
|
||||
// ============================================
|
||||
|
||||
async function fetchHtml(url: string): Promise<string> {
|
||||
const response = await fetch(url, {
|
||||
headers: {
|
||||
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"Accept-Language": "de-DE,de;q=0.9,en-US;q=0.8,en;q=0.7",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Cache-Control": "no-cache",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
},
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status} for ${url}`);
|
||||
}
|
||||
|
||||
const html = await response.text();
|
||||
|
||||
// Log some debug info
|
||||
console.log(`[Scraper] Fetched ${url.length} chars`);
|
||||
console.log(`[Scraper] Contains 'application/ld+json': ${html.includes('application/ld+json')}`);
|
||||
console.log(`[Scraper] Contains 'airbnb': ${html.toLowerCase().includes('airbnb')}`);
|
||||
|
||||
return html;
|
||||
}
|
||||
|
||||
// Re-export utilities for backward compatibility
|
||||
export { normalizeAirbnbUrlWithContext as normalizeAirbnbUrl } from "./url-normalizer";
|
||||
export { extractAirbnbExternalId } from "./url-normalizer";
|
||||
|
||||
// Need to import TripContext for TypeScript
|
||||
import type { TripContext, ExtractedListing } from "./types";
|
||||
|
||||
@ -12,6 +12,7 @@ export interface JsonLdData {
|
||||
cancellationPolicy: string | null;
|
||||
hostName: string | null;
|
||||
amenities: string[];
|
||||
price: number | null;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -31,6 +32,7 @@ export function parseJsonLd($: cheerio.CheerioAPI): JsonLdData {
|
||||
cancellationPolicy: null,
|
||||
hostName: null,
|
||||
amenities: [],
|
||||
price: null,
|
||||
};
|
||||
|
||||
const jsonLdScript = $('script[type="application/ld+json"]').html();
|
||||
@ -117,6 +119,15 @@ export function parseJsonLd($: cheerio.CheerioAPI): JsonLdData {
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
// Price - extract from makesOffer.offers[0].price or offers.price
|
||||
const priceValue = jsonData.makesOffer?.offers?.[0]?.price ?? jsonData.offers?.price;
|
||||
if (priceValue !== undefined && priceValue !== null) {
|
||||
const parsed = typeof priceValue === 'number' ? priceValue : parseFloat(String(priceValue));
|
||||
if (!isNaN(parsed)) {
|
||||
result.price = parsed;
|
||||
}
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.error('Failed to parse JSON-LD:', error);
|
||||
}
|
||||
|
||||
@ -9,11 +9,12 @@ function tryExtractPriceFromHtml(html: string, $: cheerio.CheerioAPI): number |
|
||||
// Try various price selectors that Airbnb might use
|
||||
const priceSelectors = [
|
||||
'[data-testid="price-amount"]',
|
||||
'[data-testid="book-it-default"] span',
|
||||
'span[class*="Price"]',
|
||||
'span[class*="price"]',
|
||||
'[itemprop="price"]',
|
||||
'._1y6k3r2',
|
||||
'._1dss1omb',
|
||||
'div[class*="bookit"] span',
|
||||
'section[class*="booking"] span',
|
||||
];
|
||||
|
||||
for (const selector of priceSelectors) {
|
||||
@ -33,6 +34,16 @@ function tryExtractPriceFromHtml(html: string, $: cheerio.CheerioAPI): number |
|
||||
return priceFromHtml;
|
||||
}
|
||||
|
||||
// Fallback: look for "total" near price numbers
|
||||
const totalPattern = /total[^€$£]*[€$£]\s*(\d[\d.,]*)/i;
|
||||
const totalMatch = html.match(totalPattern);
|
||||
if (totalMatch) {
|
||||
const parsed = parseFloat(totalMatch[1].replace(/[.,](?=\d{3})/g, '').replace(',', '.'));
|
||||
if (!isNaN(parsed) && parsed > 0) {
|
||||
return parsed;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
@ -2,8 +2,14 @@ import { BedType, SleepingOption } from '../types';
|
||||
|
||||
/**
|
||||
* Bed type configuration: maps text patterns to bed types, spots per unit, and quality
|
||||
*
|
||||
* IMPORTANT: Longer/more specific patterns MUST come before shorter ones
|
||||
* (e.g., "bunk bed" before "bed", "double bed" before "double")
|
||||
*/
|
||||
export const BED_TYPE_CONFIG: Record<string, { type: BedType; spots: number; quality: 'FULL' | 'AUXILIARY' }> = {
|
||||
// Compound bed types (must come first to avoid partial matches)
|
||||
'bunk bed': { type: 'BUNK', spots: 2, quality: 'FULL' },
|
||||
'etagenbett': { type: 'BUNK', spots: 2, quality: 'FULL' },
|
||||
'double bed': { type: 'DOUBLE', spots: 2, quality: 'FULL' },
|
||||
'doppelbett': { type: 'DOUBLE', spots: 2, quality: 'FULL' },
|
||||
'queen bed': { type: 'QUEEN', spots: 2, quality: 'FULL' },
|
||||
@ -11,11 +17,27 @@ export const BED_TYPE_CONFIG: Record<string, { type: BedType; spots: number; qua
|
||||
'single bed': { type: 'SINGLE', spots: 1, quality: 'FULL' },
|
||||
'twin bed': { type: 'SINGLE', spots: 1, quality: 'FULL' },
|
||||
'einzelbett': { type: 'SINGLE', spots: 1, quality: 'FULL' },
|
||||
'bunk bed': { type: 'BUNK', spots: 2, quality: 'FULL' },
|
||||
'etagenbett': { type: 'BUNK', spots: 2, quality: 'FULL' },
|
||||
'sofa bed': { type: 'SOFA_BED', spots: 2, quality: 'FULL' },
|
||||
'pull-out sofa': { type: 'SOFA_BED', spots: 2, quality: 'FULL' },
|
||||
'schlafsofa': { type: 'SOFA_BED', spots: 2, quality: 'FULL' },
|
||||
'murphy bed': { type: 'DOUBLE', spots: 2, quality: 'FULL' },
|
||||
'day bed': { type: 'SINGLE', spots: 1, quality: 'FULL' },
|
||||
|
||||
// Standalone bed types (without "bed" word)
|
||||
'double': { type: 'DOUBLE', spots: 2, quality: 'FULL' },
|
||||
'queen': { type: 'QUEEN', spots: 2, quality: 'FULL' },
|
||||
'king': { type: 'KING', spots: 2, quality: 'FULL' },
|
||||
'single': { type: 'SINGLE', spots: 1, quality: 'FULL' },
|
||||
'twin': { type: 'SINGLE', spots: 1, quality: 'FULL' },
|
||||
|
||||
// Plural forms
|
||||
'doubles': { type: 'DOUBLE', spots: 2, quality: 'FULL' },
|
||||
'singles': { type: 'SINGLE', spots: 1, quality: 'FULL' },
|
||||
'queens': { type: 'QUEEN', spots: 2, quality: 'FULL' },
|
||||
'kings': { type: 'KING', spots: 2, quality: 'FULL' },
|
||||
'bunks': { type: 'BUNK', spots: 2, quality: 'FULL' },
|
||||
|
||||
// Auxiliary sleeping
|
||||
'couch': { type: 'SOFA', spots: 1, quality: 'AUXILIARY' },
|
||||
'sofa': { type: 'SOFA', spots: 1, quality: 'AUXILIARY' },
|
||||
'air mattress': { type: 'AIR_MATTRESS', spots: 1, quality: 'AUXILIARY' },
|
||||
@ -24,9 +46,11 @@ export const BED_TYPE_CONFIG: Record<string, { type: BedType; spots: number; qua
|
||||
'extra mattress': { type: 'EXTRA_MATTRESS', spots: 1, quality: 'AUXILIARY' },
|
||||
'zusatzmatratze': { type: 'EXTRA_MATTRESS', spots: 1, quality: 'AUXILIARY' },
|
||||
'futon': { type: 'FUTON', spots: 1, quality: 'AUXILIARY' },
|
||||
'hammock': { type: 'SINGLE', spots: 1, quality: 'AUXILIARY' },
|
||||
};
|
||||
|
||||
// Pattern: "1 double bed" or "2 single beds" or "Bedroom 1: 1 queen bed"
|
||||
// Note: lastIndex is reset before each use to avoid global flag bug
|
||||
const BED_PATTERN = /(?:(?:bedroom|schlafzimmer|room|zimmer)\s*\d*\s*:?\s*)?(\d+)\s+([a-z\s-]+?)(?:\s|$|,|\.)/gi;
|
||||
|
||||
export interface SleepingStats {
|
||||
@ -47,6 +71,9 @@ export function parseSleepingArrangements(text: string): SleepingOption[] {
|
||||
const options: SleepingOption[] = [];
|
||||
const lowerText = text.toLowerCase();
|
||||
|
||||
// Reset lastIndex to avoid bug with global flag + exec() loop
|
||||
BED_PATTERN.lastIndex = 0;
|
||||
|
||||
let match;
|
||||
while ((match = BED_PATTERN.exec(lowerText)) !== null) {
|
||||
const quantity = parseInt(match[1], 10);
|
||||
@ -114,30 +141,78 @@ export function calculateSleepingStats(options: SleepingOption[]): SleepingStats
|
||||
/**
|
||||
* Derive sleeping options from bed count (fallback with low confidence)
|
||||
* Used when detailed sleeping arrangement text is not available
|
||||
*
|
||||
* Logic:
|
||||
* - beds >= 2 && guestCount >= beds * 1.5 → mix of double/single (assume mostly double)
|
||||
* - beds === 1 && guestCount >= 2 → double
|
||||
* - beds === 1 && guestCount === 1 → single
|
||||
* - beds >= 2 && guestCount < beds * 1.5 → mostly single
|
||||
*/
|
||||
export function deriveSleepingFromBeds(beds: number, guestCount: number): SleepingOption[] {
|
||||
if (!beds || beds < 1) return [];
|
||||
|
||||
// Assume beds are double beds if guest count suggests it
|
||||
const avgGuestsPerBed = guestCount ? guestCount / beds : 2;
|
||||
const options: SleepingOption[] = [];
|
||||
|
||||
if (avgGuestsPerBed >= 1.5) {
|
||||
// Likely double beds
|
||||
return [{
|
||||
bedType: 'DOUBLE',
|
||||
quantity: beds,
|
||||
spotsPerUnit: 2,
|
||||
quality: 'FULL',
|
||||
label: 'double bed (derived)',
|
||||
}];
|
||||
} else {
|
||||
// Likely single beds
|
||||
return [{
|
||||
bedType: 'SINGLE',
|
||||
quantity: beds,
|
||||
spotsPerUnit: 1,
|
||||
quality: 'FULL',
|
||||
label: 'single bed (derived)',
|
||||
}];
|
||||
if (beds === 1) {
|
||||
// Single bed scenario
|
||||
if (guestCount >= 2) {
|
||||
// 1 bed for 2+ guests → must be double
|
||||
options.push({
|
||||
bedType: 'DOUBLE',
|
||||
quantity: 1,
|
||||
spotsPerUnit: 2,
|
||||
quality: 'FULL',
|
||||
label: 'Doppelbett (abgeleitet)',
|
||||
});
|
||||
} else {
|
||||
// 1 bed for 1 guest → single
|
||||
options.push({
|
||||
bedType: 'SINGLE',
|
||||
quantity: 1,
|
||||
spotsPerUnit: 1,
|
||||
quality: 'FULL',
|
||||
label: 'Einzelbett (abgeleitet)',
|
||||
});
|
||||
}
|
||||
} else if (beds >= 2) {
|
||||
// Multiple beds
|
||||
const avgGuestsPerBed = guestCount ? guestCount / beds : 2;
|
||||
|
||||
if (avgGuestsPerBed >= 1.5) {
|
||||
// High guest-to-bed ratio → mix of double and single
|
||||
// Assume roughly half are double, half single
|
||||
const doubleCount = Math.ceil(beds / 2);
|
||||
const singleCount = beds - doubleCount;
|
||||
|
||||
if (doubleCount > 0) {
|
||||
options.push({
|
||||
bedType: 'DOUBLE',
|
||||
quantity: doubleCount,
|
||||
spotsPerUnit: 2,
|
||||
quality: 'FULL',
|
||||
label: 'Doppelbett (abgeleitet)',
|
||||
});
|
||||
}
|
||||
if (singleCount > 0) {
|
||||
options.push({
|
||||
bedType: 'SINGLE',
|
||||
quantity: singleCount,
|
||||
spotsPerUnit: 1,
|
||||
quality: 'FULL',
|
||||
label: 'Einzelbett (abgeleitet)',
|
||||
});
|
||||
}
|
||||
} else {
|
||||
// Low guest-to-bed ratio → mostly single beds
|
||||
options.push({
|
||||
bedType: 'SINGLE',
|
||||
quantity: beds,
|
||||
spotsPerUnit: 1,
|
||||
quality: 'FULL',
|
||||
label: 'Einzelbett (abgeleitet)',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return options;
|
||||
}
|
||||
|
||||
419
src/lib/airbnb/puppeteer-scraper.ts
Normal file
419
src/lib/airbnb/puppeteer-scraper.ts
Normal file
@ -0,0 +1,419 @@
|
||||
import puppeteer from 'puppeteer-extra';
|
||||
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
import type { Browser, Page } from 'puppeteer';
|
||||
import * as cheerio from 'cheerio';
|
||||
import {
|
||||
ExtractedListing,
|
||||
FieldSource,
|
||||
field,
|
||||
mergeField,
|
||||
TripContext,
|
||||
PriceStatus,
|
||||
SleepingDataQuality
|
||||
} from './types';
|
||||
import { parseJsonLd } from './parsers/jsonld';
|
||||
import { parseCapacityFacts, parseRating, parseHost, parseMaxGuests, extractVisibleText, parseTitle } from './parsers/text-patterns';
|
||||
import { extractLocation } from './parsers/location';
|
||||
import { extractPrice } from './parsers/price';
|
||||
import { parseSleepingArrangements, calculateSleepingStats, deriveSleepingFromBeds, BED_TYPE_CONFIG } from './parsers/sleeping';
|
||||
|
||||
// Enable stealth mode
|
||||
import Stealth from 'puppeteer-extra-plugin-stealth';
|
||||
puppeteer.use(Stealth());
|
||||
|
||||
/**
|
||||
* Main Puppeteer-based scraper that actually renders JavaScript
|
||||
*/
|
||||
export async function scrapeAirbnbWithPuppeteer(
|
||||
url: string,
|
||||
options?: { tripContext?: TripContext }
|
||||
): Promise<ExtractedListing | null> {
|
||||
let browser: Browser | null = null;
|
||||
|
||||
try {
|
||||
// Launch browser with stealth mode
|
||||
browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: [
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-dev-shm-usage',
|
||||
'--disable-gpu',
|
||||
'--window-size=1920,1080',
|
||||
],
|
||||
});
|
||||
|
||||
const page: Page = await browser.newPage();
|
||||
|
||||
// Set realistic viewport and user agent
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
|
||||
|
||||
// Navigate and wait for network idle
|
||||
console.log(`[Puppeteer] Navigating to ${url}`);
|
||||
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
|
||||
|
||||
// Wait a bit more for dynamic content
|
||||
await new Promise(resolve => setTimeout(resolve, 2000));
|
||||
|
||||
// Check if we got a 404 or challenge page
|
||||
const pageTitle = await page.title();
|
||||
if (pageTitle.includes('404') || pageTitle.includes('Not Found')) {
|
||||
console.error('[Puppeteer] Got 404 page');
|
||||
return null;
|
||||
}
|
||||
|
||||
console.log(`[Puppeteer] Page title: ${pageTitle}`);
|
||||
|
||||
// Get rendered HTML
|
||||
const html = await page.content();
|
||||
const $ = cheerio.load(html);
|
||||
|
||||
// Extract visible text for pattern matching
|
||||
const visibleText = extractVisibleText(html);
|
||||
console.log(`[Puppeteer] Extracted ${visibleText.length} chars of visible text`);
|
||||
|
||||
// Run all parsers
|
||||
const jsonldData = parseJsonLd($);
|
||||
console.log(`[Puppeteer] JSON-LD: title=${!!jsonldData.title}, images=${jsonldData.images.length}`);
|
||||
|
||||
const capacityFacts = parseCapacityFacts(visibleText);
|
||||
console.log(`[Puppeteer] Capacity: ${JSON.stringify(capacityFacts)}`);
|
||||
|
||||
const ratingFacts = parseRating(visibleText);
|
||||
const hostName = parseHost(visibleText);
|
||||
const maxGuests = parseMaxGuests(visibleText);
|
||||
|
||||
// Try to get sleeping arrangements from the rendered page
|
||||
const sleepingOptions = await parseSleepingArrangementsFromPage(page);
|
||||
console.log(`[Puppeteer] Sleeping options: ${sleepingOptions.length} found`);
|
||||
|
||||
const tripContext: TripContext = {
|
||||
checkIn: options?.tripContext?.checkIn || undefined,
|
||||
checkOut: options?.tripContext?.checkOut || undefined,
|
||||
adults: options?.tripContext?.adults || 4,
|
||||
};
|
||||
|
||||
const priceData = extractPrice(html, $, tripContext);
|
||||
|
||||
// Use JSON-LD price as fallback if price extraction failed
|
||||
if (jsonldData.price !== null && priceData.nightly.value === null) {
|
||||
priceData.nightly = { value: jsonldData.price, source: 'jsonld', confidence: 'medium' };
|
||||
priceData.status = 'EXTRACTED';
|
||||
|
||||
// Calculate total if trip context available
|
||||
if (tripContext.checkIn && tripContext.checkOut) {
|
||||
try {
|
||||
const checkIn = new Date(tripContext.checkIn);
|
||||
const checkOut = new Date(tripContext.checkOut);
|
||||
const nights = Math.round((checkOut.getTime() - checkIn.getTime()) / (1000 * 60 * 60 * 24));
|
||||
if (nights > 0) {
|
||||
priceData.total = { value: jsonldData.price * nights, source: 'derived', confidence: 'medium' };
|
||||
}
|
||||
} catch {
|
||||
// Invalid dates, skip total calculation
|
||||
}
|
||||
}
|
||||
}
|
||||
const locationData = extractLocation($, html);
|
||||
const pageTitleParsed = parseTitle(html);
|
||||
|
||||
// Extract images from the rendered page (more reliable)
|
||||
const images = extractImagesFromPage($);
|
||||
console.log(`[Puppeteer] Found ${images.length} images`);
|
||||
|
||||
// Extract description from rendered page
|
||||
const description = extractDescriptionFromPage($);
|
||||
|
||||
// Extract amenities if not in JSON-LD
|
||||
const amenities = jsonldData.amenities.length > 0
|
||||
? jsonldData.amenities
|
||||
: extractAmenitiesFromPage($);
|
||||
console.log(`[Puppeteer] Found ${amenities.length} amenities`);
|
||||
|
||||
// Build the result
|
||||
const result: ExtractedListing = {
|
||||
originalUrl: url,
|
||||
normalizedUrl: url,
|
||||
externalId: extractExternalId(url),
|
||||
|
||||
// Title - try multiple sources
|
||||
title: mergeField(
|
||||
jsonldData.title ? field(jsonldData.title, 'jsonld', 'high') : null,
|
||||
pageTitleParsed ? field(pageTitleParsed, 'text_pattern', 'medium') : null
|
||||
),
|
||||
|
||||
description: mergeField(
|
||||
jsonldData.description ? field(jsonldData.description, 'jsonld', 'high') : null,
|
||||
description ? field(description, 'dom', 'medium') : null
|
||||
),
|
||||
|
||||
// Location
|
||||
locationText: locationData.locationText.value
|
||||
? field(locationData.locationText.value, locationData.locationText.source, locationData.locationText.confidence)
|
||||
: field(null, 'derived', 'low'),
|
||||
latitude: locationData.latitude,
|
||||
longitude: locationData.longitude,
|
||||
|
||||
// Pricing
|
||||
tripContext,
|
||||
nightlyPrice: priceData.nightly,
|
||||
totalPrice: priceData.total,
|
||||
priceStatus: priceData.status,
|
||||
|
||||
// Rating
|
||||
rating: mergeField(
|
||||
ratingFacts ? field(ratingFacts.rating, 'text_pattern', 'high') : null,
|
||||
jsonldData.rating ? field(jsonldData.rating, 'jsonld', 'medium') : null
|
||||
),
|
||||
reviewCount: mergeField(
|
||||
ratingFacts && ratingFacts.reviewCount > 0 ? field(ratingFacts.reviewCount, 'text_pattern', 'high') : null,
|
||||
jsonldData.reviewCount ? field(jsonldData.reviewCount, 'jsonld', 'medium') : null
|
||||
),
|
||||
|
||||
// Capacity
|
||||
guestCount: mergeField(
|
||||
capacityFacts ? field(capacityFacts.guests, 'text_pattern', 'high') : null,
|
||||
maxGuests ? field(maxGuests, 'text_pattern', 'medium') : null
|
||||
),
|
||||
officialGuestCount: mergeField(
|
||||
maxGuests ? field(maxGuests, 'text_pattern', 'high') : null,
|
||||
field(null, 'derived', 'low')
|
||||
),
|
||||
bedrooms: mergeField(
|
||||
capacityFacts ? field(capacityFacts.bedrooms, 'text_pattern', 'high') : null,
|
||||
field(null, 'derived', 'low')
|
||||
),
|
||||
beds: mergeField(
|
||||
capacityFacts ? field(capacityFacts.beds, 'text_pattern', 'high') : null,
|
||||
field(null, 'derived', 'low')
|
||||
),
|
||||
bathrooms: mergeField(
|
||||
capacityFacts ? field(capacityFacts.bathrooms, 'text_pattern', 'high') : null,
|
||||
field(null, 'derived', 'low')
|
||||
),
|
||||
|
||||
// Sleeping
|
||||
sleepingOptions,
|
||||
maxSleepingPlaces: 0,
|
||||
suitableFor4: false,
|
||||
extraMattressesNeededFor4: 0,
|
||||
sleepingDataQuality: 'UNKNOWN',
|
||||
|
||||
// Host
|
||||
hostName: mergeField(
|
||||
hostName ? field(hostName, 'text_pattern', 'high') : null,
|
||||
jsonldData.hostName ? field(jsonldData.hostName, 'jsonld', 'medium') : null
|
||||
),
|
||||
|
||||
// Amenities
|
||||
amenities,
|
||||
|
||||
// Images
|
||||
images,
|
||||
coverImage: images[0] || null,
|
||||
|
||||
// Other
|
||||
cancellationPolicy: jsonldData.cancellationPolicy
|
||||
? field(jsonldData.cancellationPolicy, 'jsonld', 'high')
|
||||
: field(null, 'derived', 'low'),
|
||||
|
||||
// Debug
|
||||
rawSnippets: {
|
||||
title: jsonldData.title || pageTitleParsed || '',
|
||||
visibleText: visibleText.substring(0, 2000),
|
||||
},
|
||||
extractionLog: [
|
||||
`Puppeteer render: ${url}`,
|
||||
`Page title: ${pageTitle}`,
|
||||
`Images found: ${images.length}`,
|
||||
`Amenities found: ${amenities.length}`,
|
||||
`Capacity: ${JSON.stringify(capacityFacts)}`,
|
||||
],
|
||||
};
|
||||
|
||||
// Calculate sleeping stats
|
||||
if (sleepingOptions.length > 0) {
|
||||
const stats = calculateSleepingStats(sleepingOptions);
|
||||
result.maxSleepingPlaces = stats.maxSleepingPlaces;
|
||||
result.suitableFor4 = stats.suitableFor4;
|
||||
result.extraMattressesNeededFor4 = stats.extraMattressesNeededFor4;
|
||||
result.sleepingDataQuality = 'EXACT';
|
||||
} else if (result.beds.value && result.guestCount.value) {
|
||||
const derivedOptions = deriveSleepingFromBeds(result.beds.value, result.guestCount.value);
|
||||
const stats = calculateSleepingStats(derivedOptions);
|
||||
result.sleepingOptions = derivedOptions;
|
||||
result.maxSleepingPlaces = stats.maxSleepingPlaces;
|
||||
result.suitableFor4 = stats.suitableFor4;
|
||||
result.extraMattressesNeededFor4 = stats.extraMattressesNeededFor4;
|
||||
result.sleepingDataQuality = 'DERIVED';
|
||||
}
|
||||
|
||||
return result;
|
||||
|
||||
} catch (error) {
|
||||
console.error('[Puppeteer] Scraper error:', error);
|
||||
return null;
|
||||
} finally {
|
||||
if (browser) {
|
||||
await browser.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract external ID from URL
|
||||
*/
|
||||
function extractExternalId(url: string): string | null {
|
||||
const match = url.match(/\/rooms\/(\d+)/);
|
||||
return match?.[1] || null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract images from the rendered page
|
||||
*/
|
||||
function extractImagesFromPage($: cheerio.CheerioAPI): string[] {
|
||||
const images: string[] = [];
|
||||
|
||||
// Try og:image
|
||||
const ogImage = $('meta[property="og:image"]').attr('content');
|
||||
if (ogImage) images.push(ogImage);
|
||||
|
||||
// Try JSON-LD images (already handled separately)
|
||||
|
||||
// Try data-testid image elements
|
||||
$('[data-testid*="photo"] img, [data-testid*="image"] img, [class*="photo"] img').each((_, el) => {
|
||||
const src = $(el).attr('src') || $(el).attr('data-src') || $(el).attr('data-image');
|
||||
if (src && src.startsWith('http') && !images.includes(src)) {
|
||||
images.push(src);
|
||||
}
|
||||
});
|
||||
|
||||
return images;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract description from the rendered page
|
||||
*/
|
||||
function extractDescriptionFromPage($: cheerio.CheerioAPI): string | null {
|
||||
// Try various selectors for description
|
||||
const selectors = [
|
||||
'[data-section-id="DESCRIPTION_DEFAULT"]',
|
||||
'#description',
|
||||
'.description',
|
||||
'[itemprop="description"]',
|
||||
];
|
||||
|
||||
for (const selector of selectors) {
|
||||
const text = $(selector).text().trim();
|
||||
if (text.length > 20) {
|
||||
return text.substring(0, 500);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract amenities from the rendered page
|
||||
*/
|
||||
function extractAmenitiesFromPage($: cheerio.CheerioAPI): string[] {
|
||||
const amenities: string[] = [];
|
||||
|
||||
$('[data-testid*="amenity"]').each((_, el) => {
|
||||
const text = $(el).text().trim();
|
||||
if (text && !amenities.includes(text)) {
|
||||
amenities.push(text);
|
||||
}
|
||||
});
|
||||
|
||||
return amenities;
|
||||
}
|
||||
|
||||
/**
|
||||
* Map BedType to spotsPerUnit using BED_TYPE_CONFIG
|
||||
*/
|
||||
const BED_TYPE_SPOTS_MAP: Record<string, number> = (() => {
|
||||
const map: Record<string, number> = {};
|
||||
for (const config of Object.values(BED_TYPE_CONFIG)) {
|
||||
if (!(config.type in map)) {
|
||||
map[config.type] = config.spots;
|
||||
}
|
||||
}
|
||||
return map;
|
||||
})();
|
||||
|
||||
/**
|
||||
* Try to parse sleeping arrangements from Puppeteer page
|
||||
* This is more reliable than text parsing
|
||||
*/
|
||||
async function parseSleepingArrangementsFromPage(page: Page): Promise<ExtractedListing['sleepingOptions']> {
|
||||
const options: ExtractedListing['sleepingOptions'] = [];
|
||||
|
||||
try {
|
||||
// Try to find sleeping/bedroom section
|
||||
const sleepingSection = await page.$('[data-section-id="SLEEPING_CONFIGURATION"]');
|
||||
|
||||
if (sleepingSection) {
|
||||
const text = await sleepingSection.evaluate(el => el.textContent);
|
||||
|
||||
// Parse bed types from text
|
||||
const bedPatterns = [
|
||||
/(\d+)\s*(?:×|x)?\s*(queen|king|single|double|twin|full|king-size|queen-size)\s*bed/gi,
|
||||
/(\d+)\s*(?:×|x)?\s*Futon/gi,
|
||||
/(\d+)\s*(?:×|x)?\s*Matratze/gi,
|
||||
/(\d+)\s*(?:×|x)?\s*Couch/gi,
|
||||
];
|
||||
|
||||
for (const pattern of bedPatterns) {
|
||||
let match;
|
||||
while ((match = pattern.exec(text)) !== null) {
|
||||
const quantity = parseInt(match[1], 10);
|
||||
const bedType = match[2] || 'bed';
|
||||
|
||||
// Map German/English bed types to BedType enum
|
||||
let normalizedType: import('./types').BedType = 'UNKNOWN';
|
||||
let quality: 'FULL' | 'AUXILIARY' = 'AUXILIARY';
|
||||
|
||||
const lower = bedType.toLowerCase();
|
||||
if (lower.includes('queen')) {
|
||||
normalizedType = 'QUEEN';
|
||||
quality = 'FULL';
|
||||
} else if (lower.includes('king')) {
|
||||
normalizedType = 'KING';
|
||||
quality = 'FULL';
|
||||
} else if (lower.includes('double') || lower.includes('full')) {
|
||||
normalizedType = 'DOUBLE';
|
||||
quality = 'FULL';
|
||||
} else if (lower.includes('twin') || lower.includes('single')) {
|
||||
normalizedType = 'SINGLE';
|
||||
quality = 'FULL';
|
||||
} else if (lower.includes('futon')) {
|
||||
normalizedType = 'FUTON';
|
||||
quality = 'AUXILIARY';
|
||||
} else if (lower.includes('matratze') || lower.includes('mattress')) {
|
||||
normalizedType = 'EXTRA_MATTRESS';
|
||||
quality = 'AUXILIARY';
|
||||
} else if (lower.includes('couch') || lower.includes('sofa')) {
|
||||
normalizedType = 'SOFA';
|
||||
quality = 'AUXILIARY';
|
||||
} else {
|
||||
normalizedType = 'DOUBLE';
|
||||
quality = 'FULL';
|
||||
}
|
||||
|
||||
options.push({
|
||||
bedType: normalizedType,
|
||||
quantity,
|
||||
spotsPerUnit: BED_TYPE_SPOTS_MAP[normalizedType] ?? 2,
|
||||
quality,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('[Puppeteer] Error parsing sleeping arrangements:', error);
|
||||
}
|
||||
|
||||
return options;
|
||||
}
|
||||
96
test-scraper-debug.ts
Normal file
96
test-scraper-debug.ts
Normal file
@ -0,0 +1,96 @@
|
||||
/**
|
||||
* Debug test - captures more info about what's happening
|
||||
*/
|
||||
|
||||
import puppeteer from 'puppeteer-extra';
|
||||
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
|
||||
puppeteer.use(StealthPlugin());
|
||||
|
||||
const TEST_URL = 'https://www.airbnb.com/rooms/842937876795894279';
|
||||
|
||||
async function main() {
|
||||
console.log('Starting debug test...\n');
|
||||
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: [
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-dev-shm-usage',
|
||||
'--disable-gpu',
|
||||
'--window-size=1920,1080',
|
||||
],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
|
||||
|
||||
console.log(`Navigating to: ${TEST_URL}`);
|
||||
|
||||
// Track redirects
|
||||
page.on('response', (response) => {
|
||||
const status = response.status();
|
||||
const url = response.url();
|
||||
if (status >= 300 && status < 400) {
|
||||
console.log(`🔄 Redirect: ${status} → ${response.headers()['location']?.substring(0, 100)}`);
|
||||
}
|
||||
});
|
||||
|
||||
try {
|
||||
const response = await page.goto(TEST_URL, {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 60000
|
||||
});
|
||||
|
||||
console.log(`\n📊 Response status: ${response?.status()}`);
|
||||
console.log(`📊 Final URL: ${page.url()}`);
|
||||
console.log(`📊 Page title: ${await page.title()}`);
|
||||
|
||||
// Wait longer for dynamic content
|
||||
console.log('\n⏳ Waiting 5 seconds for dynamic content...');
|
||||
await new Promise(r => setTimeout(r, 5000));
|
||||
|
||||
// Get page content
|
||||
const html = await page.content();
|
||||
console.log(`\n📄 HTML length: ${html.length} chars`);
|
||||
|
||||
// Check for challenge page
|
||||
if (html.includes('challenge') || html.includes('captcha') || html.includes('blocked')) {
|
||||
console.log('⚠️ Possible challenge/blocked page detected!');
|
||||
}
|
||||
|
||||
// Check if we're on the homepage
|
||||
if (page.url() === 'https://www.airbnb.com/' || page.url() === 'https://www.airbnb.com') {
|
||||
console.log('⚠️ Redirected to homepage - likely blocked!');
|
||||
}
|
||||
|
||||
// Extract visible text
|
||||
const bodyText = await page.evaluate(() => document.body.innerText);
|
||||
console.log(`\n📝 Body text length: ${bodyText.length} chars`);
|
||||
console.log(`\n📝 First 500 chars of visible text:\n${bodyText.substring(0, 500)}`);
|
||||
|
||||
// Check for specific listing elements
|
||||
const hasListingTitle = await page.$('[data-plugin-in-point-id="TITLE_DEFAULT"]');
|
||||
const hasPhotos = await page.$('[data-section-id="PHOTO_PICKER"]');
|
||||
const hasPrice = await page.$('[data-plugin-in-point-id="PRICE_DEFAULT"]');
|
||||
|
||||
console.log(`\n🔍 Listing elements found:`);
|
||||
console.log(` Title section: ${hasListingTitle ? '✅' : '❌'}`);
|
||||
console.log(` Photos section: ${hasPhotos ? '✅' : '❌'}`);
|
||||
console.log(` Price section: ${hasPrice ? '✅' : '❌'}`);
|
||||
|
||||
// Take a screenshot
|
||||
await page.screenshot({ path: 'debug-screenshot.png', fullPage: false });
|
||||
console.log(`\n📸 Screenshot saved to: debug-screenshot.png`);
|
||||
|
||||
} catch (error) {
|
||||
console.error('Error:', error);
|
||||
} finally {
|
||||
await browser.close();
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
127
test-scraper.ts
Normal file
127
test-scraper.ts
Normal file
@ -0,0 +1,127 @@
|
||||
/**
|
||||
* Test script for Puppeteer-based Airbnb scraper
|
||||
* Run with: npx tsx test-scraper.ts
|
||||
*/
|
||||
|
||||
import { scrapeAirbnbWithPuppeteer } from './src/lib/airbnb/puppeteer-scraper';
|
||||
|
||||
const TEST_URL = 'https://www.airbnb.com/rooms/52367822'; // Valid listing in Bad Bellingen, Germany
|
||||
|
||||
async function main() {
|
||||
console.log('========================================');
|
||||
console.log('Airbnb Puppeteer Scraper Test');
|
||||
console.log('========================================\n');
|
||||
|
||||
console.log(`Testing URL: ${TEST_URL}\n`);
|
||||
console.log('Starting scraper (this may take 30-60 seconds)...\n');
|
||||
|
||||
const startTime = Date.now();
|
||||
|
||||
try {
|
||||
const result = await scrapeAirbnbWithPuppeteer(TEST_URL);
|
||||
|
||||
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
||||
console.log(`\n✅ Scraping completed in ${elapsed}s\n`);
|
||||
|
||||
if (!result) {
|
||||
console.log('❌ Result is null - scraping may have failed');
|
||||
return;
|
||||
}
|
||||
|
||||
console.log('========================================');
|
||||
console.log('EXTRACTED DATA');
|
||||
console.log('========================================\n');
|
||||
|
||||
// Title
|
||||
console.log('📌 TITLE:');
|
||||
console.log(` Value: ${result.title?.value || 'N/A'}`);
|
||||
console.log(` Source: ${result.title?.source || 'N/A'}`);
|
||||
console.log(` Confidence: ${result.title?.confidence || 'N/A'}\n`);
|
||||
|
||||
// Price
|
||||
console.log('💰 PRICE:');
|
||||
console.log(` Nightly: ${result.nightlyPrice?.value || 'N/A'} EUR`);
|
||||
console.log(` Total: ${result.totalPrice?.value || 'N/A'} EUR`);
|
||||
console.log(` Status: ${result.priceStatus || 'N/A'}\n`);
|
||||
|
||||
// Location
|
||||
console.log('📍 LOCATION:');
|
||||
console.log(` Text: ${result.locationText?.value || 'N/A'}`);
|
||||
console.log(` Lat/Lng: ${result.latitude}, ${result.longitude}\n`);
|
||||
|
||||
// Rating
|
||||
console.log('⭐ RATING:');
|
||||
console.log(` Rating: ${result.rating?.value || 'N/A'}`);
|
||||
console.log(` Reviews: ${result.reviewCount?.value || 'N/A'}\n`);
|
||||
|
||||
// Capacity
|
||||
console.log('🏠 CAPACITY:');
|
||||
console.log(` Guests: ${result.guestCount?.value || 'N/A'}`);
|
||||
console.log(` Bedrooms: ${result.bedrooms?.value || 'N/A'}`);
|
||||
console.log(` Beds: ${result.beds?.value || 'N/A'}`);
|
||||
console.log(` Bathrooms: ${result.bathrooms?.value || 'N/A'}\n`);
|
||||
|
||||
// Sleeping Options
|
||||
console.log('🛏️ SLEEPING OPTIONS:');
|
||||
if (result.sleepingOptions && result.sleepingOptions.length > 0) {
|
||||
result.sleepingOptions.forEach((opt, i) => {
|
||||
console.log(` ${i + 1}. ${opt.quantity}x ${opt.bedType} (${opt.spotsPerUnit} spots, ${opt.quality})`);
|
||||
});
|
||||
console.log(` Max sleeping places: ${result.maxSleepingPlaces}`);
|
||||
console.log(` Suitable for 4: ${result.suitableFor4 ? '✅ Yes' : '❌ No'}`);
|
||||
console.log(` Quality: ${result.sleepingDataQuality}`);
|
||||
} else {
|
||||
console.log(' No sleeping options extracted');
|
||||
}
|
||||
console.log('');
|
||||
|
||||
// Host
|
||||
console.log('👤 HOST:');
|
||||
console.log(` Name: ${result.hostName?.value || 'N/A'}\n`);
|
||||
|
||||
// Images
|
||||
console.log('🖼️ IMAGES:');
|
||||
console.log(` Count: ${result.images?.length || 0}`);
|
||||
if (result.images && result.images.length > 0) {
|
||||
console.log(` First 3:`);
|
||||
result.images.slice(0, 3).forEach((img, i) => {
|
||||
console.log(` ${i + 1}. ${img.substring(0, 80)}...`);
|
||||
});
|
||||
}
|
||||
console.log('');
|
||||
|
||||
// Amenities
|
||||
console.log('✨ AMENITIES:');
|
||||
console.log(` Count: ${result.amenities?.length || 0}`);
|
||||
if (result.amenities && result.amenities.length > 0) {
|
||||
console.log(` First 10: ${result.amenities.slice(0, 10).join(', ')}`);
|
||||
}
|
||||
console.log('');
|
||||
|
||||
// Description
|
||||
console.log('📝 DESCRIPTION:');
|
||||
const desc = result.description?.value || 'N/A';
|
||||
console.log(` ${desc.substring(0, 200)}${desc.length > 200 ? '...' : ''}\n`);
|
||||
|
||||
// External ID
|
||||
console.log('🔗 EXTERNAL ID:');
|
||||
console.log(` ${result.externalId || 'N/A'}\n`);
|
||||
|
||||
// Extraction Log
|
||||
console.log('📋 EXTRACTION LOG:');
|
||||
result.extractionLog?.forEach(log => {
|
||||
console.log(` - ${log}`);
|
||||
});
|
||||
|
||||
console.log('\n========================================');
|
||||
console.log('TEST COMPLETE');
|
||||
console.log('========================================');
|
||||
|
||||
} catch (error) {
|
||||
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
||||
console.log(`\n❌ Error after ${elapsed}s:`);
|
||||
console.error(error);
|
||||
}
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
Loading…
x
Reference in New Issue
Block a user