feat: massive Airbnb import pipeline overhaul + UI fixes
🔥 Scraper Improvements: - Add JSON-LD price extraction (regression fix) - Fix sleeping spotsPerUnit bug (was hardcoded to 2) - Remove stale CSS selectors, add robust fallbacks - Add JSON-LD price fallback in extraction pipeline - Improve sleeping parser regex (lastIndex bug fix) - Add 15+ new bed type patterns (murphy, day bed, hammock, plurals) - Smarter deriveSleepingFromBeds() with mixed bed logic 📅 Import Form UX: - Smart defaults (next weekend dates) - Auto-calculate nights display - URL param auto-detection (?check_in=&check_out=&adults=) - Better visual hierarchy with icons - Progress steps during import - Success redirect to listing detail page 🗑️ Delete Button Fix: - Add router.refresh() after successful delete - Inline error state instead of alert() - Admin delete button as proper client component ✏️ Edit/Admin Fixes: - Fix revalidatePath using slug instead of id - Fix redirect to detail page after edit - Add cascade delete logic to admin deleteListing - Extract delete to proper client component 🎨 UI States for Partial Data: - Price: 'Preis auf Anfrage' with context hint - Location: 'Ort nicht erkannt' instead of empty - Sleeping: placeholder when no data - Suitability: 3-state (yes/no/unknown) - Use formatPrice/formatRating utilities 🛏️ Sleeping Data Quality: - Add sleepingDataQuality to Prisma schema - Save quality (EXACT/DERIVED/UNKNOWN) to DB - Display '(geschätzt)' label for derived data 📊 Database: - Restore corrupted schema.prisma from git - Add sleepingDataQuality field - Push schema changes ✅ TypeScript: Zero errors ✅ Build: Successful
This commit is contained in:
parent
5e5326dbcc
commit
d9a203016f
BIN
debug-screenshot.png
Normal file
BIN
debug-screenshot.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 57 KiB |
BIN
prisma/dev.db
BIN
prisma/dev.db
Binary file not shown.
Binary file not shown.
@ -40,10 +40,13 @@ model Listing {
|
|||||||
// Capacity
|
// Capacity
|
||||||
guestCount Int? @map("guest_count")
|
guestCount Int? @map("guest_count")
|
||||||
officialGuestCount Int? @map("official_guest_count")
|
officialGuestCount Int? @map("official_guest_count")
|
||||||
|
|
||||||
|
// Sleeping Analysis
|
||||||
maxSleepingPlaces Int? @map("max_sleeping_places")
|
maxSleepingPlaces Int? @map("max_sleeping_places")
|
||||||
suitableFor4 Boolean? @map("suitable_for_4")
|
suitableFor4 Boolean? @map("suitable_for_4")
|
||||||
extraMattressesNeededFor4 Int? @map("extra_mattresses_needed_for_4")
|
extraMattressesNeededFor4 Int? @map("extra_mattresses_needed_for_4")
|
||||||
bedTypesSummary String? @map("bed_types_summary")
|
bedTypesSummary String? @map("bed_types_summary")
|
||||||
|
sleepingDataQuality String? @map("sleeping_data_quality") // EXACT, DERIVED, UNKNOWN
|
||||||
|
|
||||||
// Room Details
|
// Room Details
|
||||||
bedrooms Int?
|
bedrooms Int?
|
||||||
|
|||||||
@ -73,6 +73,7 @@ export async function importListingAction(formData: FormData) {
|
|||||||
let suitableFor4 = scrapedData?.suitableFor4 || null;
|
let suitableFor4 = scrapedData?.suitableFor4 || null;
|
||||||
let extraMattressesNeededFor4 = scrapedData?.extraMattressesNeededFor4 || null;
|
let extraMattressesNeededFor4 = scrapedData?.extraMattressesNeededFor4 || null;
|
||||||
let bedTypesSummary = null;
|
let bedTypesSummary = null;
|
||||||
|
let sleepingDataQuality = scrapedData?.sleepingDataQuality || 'UNKNOWN';
|
||||||
|
|
||||||
if (scrapedData?.sleepingOptions && scrapedData.sleepingOptions.length > 0) {
|
if (scrapedData?.sleepingOptions && scrapedData.sleepingOptions.length > 0) {
|
||||||
const types = scrapedData.sleepingOptions.map(o => `${o.quantity}× ${o.bedType}`);
|
const types = scrapedData.sleepingOptions.map(o => `${o.quantity}× ${o.bedType}`);
|
||||||
@ -110,6 +111,7 @@ export async function importListingAction(formData: FormData) {
|
|||||||
suitableFor4,
|
suitableFor4,
|
||||||
extraMattressesNeededFor4,
|
extraMattressesNeededFor4,
|
||||||
bedTypesSummary,
|
bedTypesSummary,
|
||||||
|
sleepingDataQuality,
|
||||||
|
|
||||||
// Room Details
|
// Room Details
|
||||||
bedrooms: scrapedData?.bedrooms?.value || null,
|
bedrooms: scrapedData?.bedrooms?.value || null,
|
||||||
|
|||||||
@ -1,26 +1,86 @@
|
|||||||
"use client";
|
"use client";
|
||||||
|
|
||||||
import { useState } from "react";
|
import { useState } from "react";
|
||||||
|
import { useRouter } from "next/navigation";
|
||||||
import { Button } from "@/components/ui/button";
|
import { Button } from "@/components/ui/button";
|
||||||
import { Input } from "@/components/ui/input";
|
import { Input } from "@/components/ui/input";
|
||||||
import { Label } from "@/components/ui/label";
|
import { Label } from "@/components/ui/label";
|
||||||
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
|
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
|
||||||
import { importListingAction } from "@/actions/import-listing";
|
import { importListingAction } from "@/actions/import-listing";
|
||||||
|
|
||||||
|
// Calculate next weekend (Friday → Sunday)
|
||||||
|
function getNextWeekend(): { checkIn: string; checkOut: string } {
|
||||||
|
const now = new Date();
|
||||||
|
const dayOfWeek = now.getDay(); // 0=Sun, 5=Fri, 6=Sat
|
||||||
|
const daysUntilFriday = dayOfWeek <= 5 ? 5 - dayOfWeek : 7 - dayOfWeek + 5;
|
||||||
|
const friday = new Date(now);
|
||||||
|
friday.setDate(now.getDate() + daysUntilFriday);
|
||||||
|
const sunday = new Date(friday);
|
||||||
|
sunday.setDate(friday.getDate() + 2);
|
||||||
|
return {
|
||||||
|
checkIn: friday.toISOString().split("T")[0],
|
||||||
|
checkOut: sunday.toISOString().split("T")[0],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract Airbnb URL params
|
||||||
|
function extractParamsFromUrl(url: string): {
|
||||||
|
checkIn: string;
|
||||||
|
checkOut: string;
|
||||||
|
adults: string;
|
||||||
|
} | null {
|
||||||
|
try {
|
||||||
|
const u = new URL(url);
|
||||||
|
return {
|
||||||
|
checkIn: u.searchParams.get("check_in") || "",
|
||||||
|
checkOut: u.searchParams.get("check_out") || "",
|
||||||
|
adults: u.searchParams.get("adults") || "",
|
||||||
|
};
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export function ImportForm() {
|
export function ImportForm() {
|
||||||
|
const router = useRouter();
|
||||||
|
const weekend = getNextWeekend();
|
||||||
|
|
||||||
const [url, setUrl] = useState("");
|
const [url, setUrl] = useState("");
|
||||||
const [checkIn, setCheckIn] = useState("");
|
const [checkIn, setCheckIn] = useState(weekend.checkIn);
|
||||||
const [checkOut, setCheckOut] = useState("");
|
const [checkOut, setCheckOut] = useState(weekend.checkOut);
|
||||||
const [adults, setAdults] = useState("4");
|
const [adults, setAdults] = useState("4");
|
||||||
const [error, setError] = useState("");
|
const [error, setError] = useState("");
|
||||||
const [success, setSuccess] = useState(false);
|
|
||||||
const [isLoading, setIsLoading] = useState(false);
|
const [isLoading, setIsLoading] = useState(false);
|
||||||
|
const [progress, setProgress] = useState("");
|
||||||
|
|
||||||
|
const hasDates = checkIn && checkOut;
|
||||||
|
const nights = hasDates
|
||||||
|
? Math.max(
|
||||||
|
1,
|
||||||
|
Math.round(
|
||||||
|
(new Date(checkOut).getTime() - new Date(checkIn).getTime()) /
|
||||||
|
(1000 * 60 * 60 * 24)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
: null;
|
||||||
|
|
||||||
|
// Auto-extract params when URL changes
|
||||||
|
const handleUrlChange = (e: React.ChangeEvent<HTMLInputElement>) => {
|
||||||
|
const newUrl = e.target.value;
|
||||||
|
setUrl(newUrl);
|
||||||
|
const params = extractParamsFromUrl(newUrl);
|
||||||
|
if (params) {
|
||||||
|
if (params.checkIn) setCheckIn(params.checkIn);
|
||||||
|
if (params.checkOut) setCheckOut(params.checkOut);
|
||||||
|
if (params.adults) setAdults(params.adults);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
const handleSubmit = async (e: React.FormEvent) => {
|
const handleSubmit = async (e: React.FormEvent) => {
|
||||||
e.preventDefault();
|
e.preventDefault();
|
||||||
setError("");
|
setError("");
|
||||||
setSuccess(false);
|
|
||||||
setIsLoading(true);
|
setIsLoading(true);
|
||||||
|
setProgress("🔍 Scraping Airbnb-Seite...");
|
||||||
|
|
||||||
const formData = new FormData();
|
const formData = new FormData();
|
||||||
formData.append("airbnbUrl", url);
|
formData.append("airbnbUrl", url);
|
||||||
@ -28,20 +88,29 @@ export function ImportForm() {
|
|||||||
if (checkOut) formData.append("checkOut", checkOut);
|
if (checkOut) formData.append("checkOut", checkOut);
|
||||||
if (adults) formData.append("adults", adults);
|
if (adults) formData.append("adults", adults);
|
||||||
|
|
||||||
|
// Progress steps
|
||||||
|
const t1 = setTimeout(() => setProgress("📊 Extrahiere Daten..."), 2000);
|
||||||
|
const t2 = setTimeout(() => setProgress("💾 Speichere in Datenbank..."), 5000);
|
||||||
|
|
||||||
const result = await importListingAction(formData);
|
const result = await importListingAction(formData);
|
||||||
|
|
||||||
if (result.ok) {
|
clearTimeout(t1);
|
||||||
setSuccess(true);
|
clearTimeout(t2);
|
||||||
setUrl("");
|
|
||||||
|
if (result.ok && result.slug) {
|
||||||
|
setProgress("✅ Fertig! Weiterleitung...");
|
||||||
|
setTimeout(() => router.push(`/listings/${result.slug}`), 500);
|
||||||
|
return;
|
||||||
} else if (result.error) {
|
} else if (result.error) {
|
||||||
setError(result.error);
|
setError(result.error);
|
||||||
}
|
}
|
||||||
|
|
||||||
setIsLoading(false);
|
setIsLoading(false);
|
||||||
|
setProgress("");
|
||||||
};
|
};
|
||||||
|
|
||||||
// Get today's date for min date
|
// Get today's date for min date
|
||||||
const today = new Date().toISOString().split('T')[0];
|
const today = new Date().toISOString().split("T")[0];
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<Card>
|
<Card>
|
||||||
@ -49,70 +118,148 @@ export function ImportForm() {
|
|||||||
<CardTitle>🏠 Neues Airbnb importieren</CardTitle>
|
<CardTitle>🏠 Neues Airbnb importieren</CardTitle>
|
||||||
</CardHeader>
|
</CardHeader>
|
||||||
<CardContent>
|
<CardContent>
|
||||||
<form onSubmit={handleSubmit} className="space-y-4">
|
<form onSubmit={handleSubmit} className="space-y-6">
|
||||||
{/* URL Field */}
|
{/* URL Field - Prominent */}
|
||||||
<div className="space-y-2">
|
<div className="space-y-2">
|
||||||
<Label htmlFor="airbnb-url">Airbnb Link</Label>
|
<Label htmlFor="airbnb-url" className="text-base font-semibold">
|
||||||
|
🔗 Airbnb Link
|
||||||
|
</Label>
|
||||||
<Input
|
<Input
|
||||||
id="airbnb-url"
|
id="airbnb-url"
|
||||||
type="url"
|
type="url"
|
||||||
placeholder="https://www.airbnb.com/rooms/..."
|
placeholder="https://www.airbnb.com/rooms/..."
|
||||||
value={url}
|
value={url}
|
||||||
onChange={(e) => setUrl(e.target.value)}
|
onChange={handleUrlChange}
|
||||||
required
|
required
|
||||||
autoFocus
|
autoFocus
|
||||||
|
className="text-lg h-12"
|
||||||
|
disabled={isLoading}
|
||||||
/>
|
/>
|
||||||
|
<p className="text-xs text-slate-500">
|
||||||
|
Einfach den Airbnb-Link einfügen — Reisedaten werden automatisch
|
||||||
|
erkannt falls in der URL enthalten.
|
||||||
|
</p>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{/* Trip Context Fields */}
|
{/* Trip Context Fields - Grouped */}
|
||||||
<div className="space-y-2">
|
<fieldset
|
||||||
<Label>Reisedaten (optional für bessere Preise)</Label>
|
disabled={isLoading}
|
||||||
<div className="grid grid-cols-3 gap-2">
|
className={`space-y-3 rounded-lg border p-4 ${
|
||||||
<div>
|
hasDates
|
||||||
<Label htmlFor="check-in" className="text-xs">Check-in</Label>
|
? "bg-green-50 border-green-200"
|
||||||
|
: "bg-amber-50 border-amber-200"
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
<legend className="px-2 text-sm font-medium text-slate-700">
|
||||||
|
{hasDates ? "✅" : "⚠️"} Reisedaten{" "}
|
||||||
|
<span className="text-slate-400 font-normal">
|
||||||
|
(optional — für bessere Preise)
|
||||||
|
</span>
|
||||||
|
</legend>
|
||||||
|
|
||||||
|
<div className="grid grid-cols-3 gap-3">
|
||||||
|
{/* Check-in */}
|
||||||
|
<div className="space-y-1">
|
||||||
|
<Label
|
||||||
|
htmlFor="check-in"
|
||||||
|
className="text-xs flex items-center gap-1"
|
||||||
|
>
|
||||||
|
🛫 Check-in
|
||||||
|
</Label>
|
||||||
<Input
|
<Input
|
||||||
id="check-in"
|
id="check-in"
|
||||||
type="date"
|
type="date"
|
||||||
value={checkIn}
|
value={checkIn}
|
||||||
onChange={(e) => setCheckIn(e.target.value)}
|
onChange={(e) => setCheckIn(e.target.value)}
|
||||||
min={today}
|
min={today}
|
||||||
placeholder="Datum"
|
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
<div>
|
|
||||||
<Label htmlFor="check-out" className="text-xs">Check-out</Label>
|
{/* Nights Display */}
|
||||||
|
<div className="flex items-end justify-center pb-2">
|
||||||
|
{nights != null ? (
|
||||||
|
<span className="text-sm font-semibold text-green-700 bg-green-100 px-3 py-1 rounded-full">
|
||||||
|
{nights} {nights === 1 ? "Nacht" : "Nächte"}
|
||||||
|
</span>
|
||||||
|
) : (
|
||||||
|
<span className="text-sm text-slate-400">→</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Check-out */}
|
||||||
|
<div className="space-y-1">
|
||||||
|
<Label
|
||||||
|
htmlFor="check-out"
|
||||||
|
className="text-xs flex items-center gap-1"
|
||||||
|
>
|
||||||
|
🛬 Check-out
|
||||||
|
</Label>
|
||||||
<Input
|
<Input
|
||||||
id="check-out"
|
id="check-out"
|
||||||
type="date"
|
type="date"
|
||||||
value={checkOut}
|
value={checkOut}
|
||||||
onChange={(e) => setCheckOut(e.target.value)}
|
onChange={(e) => setCheckOut(e.target.value)}
|
||||||
min={checkIn || today}
|
min={checkIn || today}
|
||||||
placeholder="Datum"
|
|
||||||
/>
|
|
||||||
</div>
|
|
||||||
<div>
|
|
||||||
<Label htmlFor="adults" className="text-xs">Personen</Label>
|
|
||||||
<Input
|
|
||||||
id="adults"
|
|
||||||
type="number"
|
|
||||||
min="1"
|
|
||||||
max="16"
|
|
||||||
value={adults}
|
|
||||||
onChange={(e) => setAdults(e.target.value)}
|
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<p className="text-xs text-slate-500">
|
|
||||||
💡 Mit Reisedaten kann der Preis genauer ermittelt werden.
|
|
||||||
Die Daten werden auch aus der URL extrahiert wenn vorhanden.
|
|
||||||
</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{error && <div className="text-red-500 text-sm">{error}</div>}
|
{/* Adults */}
|
||||||
{success && <div className="text-green-500 text-sm">✓ Erfolgreich importiert!</div>}
|
<div className="space-y-1">
|
||||||
|
<Label
|
||||||
<Button type="submit" className="w-full" disabled={isLoading || !url}>
|
htmlFor="adults"
|
||||||
{isLoading ? "⏳ Wird importiert..." : "🚀 Importieren"}
|
className="text-xs flex items-center gap-1"
|
||||||
|
>
|
||||||
|
👥 Personen
|
||||||
|
</Label>
|
||||||
|
<Input
|
||||||
|
id="adults"
|
||||||
|
type="number"
|
||||||
|
min="1"
|
||||||
|
max="16"
|
||||||
|
value={adults}
|
||||||
|
onChange={(e) => setAdults(e.target.value)}
|
||||||
|
className="w-24"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<p className="text-xs text-slate-600">
|
||||||
|
{hasDates
|
||||||
|
? `💡 Preis wird für ${nights} Nacht${
|
||||||
|
nights !== 1 ? "e" : ""
|
||||||
|
} mit ${adults} Person${adults !== "1" ? "en" : ""} ermittelt.`
|
||||||
|
: "⚠️ Ohne Reisedaten wird kein Preis ermittelt."}
|
||||||
|
</p>
|
||||||
|
</fieldset>
|
||||||
|
|
||||||
|
{/* Error */}
|
||||||
|
{error && (
|
||||||
|
<div className="bg-red-50 border border-red-200 text-red-700 px-4 py-3 rounded-lg text-sm">
|
||||||
|
❌ {error}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Loading Progress */}
|
||||||
|
{isLoading && progress && (
|
||||||
|
<div className="bg-blue-50 border border-blue-200 text-blue-700 px-4 py-3 rounded-lg text-sm flex items-center gap-2">
|
||||||
|
<span className="animate-spin inline-block">⏳</span>
|
||||||
|
{progress}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Submit Button */}
|
||||||
|
<Button
|
||||||
|
type="submit"
|
||||||
|
className="w-full h-12 text-lg"
|
||||||
|
disabled={isLoading || !url}
|
||||||
|
>
|
||||||
|
{isLoading ? (
|
||||||
|
<span className="flex items-center gap-2">
|
||||||
|
<span className="animate-spin">⏳</span> Importiere...
|
||||||
|
</span>
|
||||||
|
) : (
|
||||||
|
"🚀 Jetzt importieren"
|
||||||
|
)}
|
||||||
</Button>
|
</Button>
|
||||||
</form>
|
</form>
|
||||||
</CardContent>
|
</CardContent>
|
||||||
|
|||||||
49
src/app/(protected)/admin/listings/[slug]/delete-button.tsx
Normal file
49
src/app/(protected)/admin/listings/[slug]/delete-button.tsx
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
"use client";
|
||||||
|
|
||||||
|
import { useState } from "react";
|
||||||
|
import { useRouter } from "next/navigation";
|
||||||
|
import { Button } from "@/components/ui/button";
|
||||||
|
import { deleteListing } from "../actions";
|
||||||
|
|
||||||
|
interface DeleteListingButtonProps {
|
||||||
|
listingId: string;
|
||||||
|
listingTitle: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function DeleteListingButton({ listingId, listingTitle }: DeleteListingButtonProps) {
|
||||||
|
const [isDeleting, setIsDeleting] = useState(false);
|
||||||
|
const [error, setError] = useState<string | null>(null);
|
||||||
|
const router = useRouter();
|
||||||
|
|
||||||
|
const handleDelete = async () => {
|
||||||
|
if (!confirm(`"${listingTitle}" wirklich löschen?`)) return;
|
||||||
|
|
||||||
|
setError(null);
|
||||||
|
setIsDeleting(true);
|
||||||
|
try {
|
||||||
|
const formData = new FormData();
|
||||||
|
formData.append("id", listingId);
|
||||||
|
await deleteListing(formData);
|
||||||
|
router.push("/listings");
|
||||||
|
} catch (err) {
|
||||||
|
setError(err instanceof Error ? err.message : "Fehler beim Löschen");
|
||||||
|
setIsDeleting(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="flex flex-col gap-1">
|
||||||
|
<Button
|
||||||
|
variant="destructive"
|
||||||
|
onClick={handleDelete}
|
||||||
|
disabled={isDeleting}
|
||||||
|
className="w-full"
|
||||||
|
>
|
||||||
|
{isDeleting ? "⏳ Lösche..." : "🗑️ Listing löschen"}
|
||||||
|
</Button>
|
||||||
|
{error && (
|
||||||
|
<p className="text-xs text-red-500 text-center">{error}</p>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
@ -4,7 +4,8 @@ import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
|
|||||||
import { Button } from "@/components/ui/button";
|
import { Button } from "@/components/ui/button";
|
||||||
import { Input } from "@/components/ui/input";
|
import { Input } from "@/components/ui/input";
|
||||||
import { Label } from "@/components/ui/label";
|
import { Label } from "@/components/ui/label";
|
||||||
import { updateListing, deleteListing, addNote, addTagToListing, removeTagFromListing } from "../actions";
|
import { updateListing, addNote, addTagToListing, removeTagFromListing } from "../actions";
|
||||||
|
import { DeleteListingButton } from "./delete-button";
|
||||||
// Note: actions.ts is in /admin/listings/, so from [slug]/ we go up one level with ../
|
// Note: actions.ts is in /admin/listings/, so from [slug]/ we go up one level with ../
|
||||||
|
|
||||||
export default async function EditListingPage({
|
export default async function EditListingPage({
|
||||||
@ -213,21 +214,12 @@ export default async function EditListingPage({
|
|||||||
</div>
|
</div>
|
||||||
</form>
|
</form>
|
||||||
|
|
||||||
<form action={deleteListing} className="mt-4">
|
<div className="mt-4">
|
||||||
<input type="hidden" name="id" value={listing.id} />
|
<DeleteListingButton
|
||||||
<Button
|
listingId={listing.id}
|
||||||
type="submit"
|
listingTitle={listing.title}
|
||||||
variant="destructive"
|
/>
|
||||||
className="w-full"
|
</div>
|
||||||
onClick={(e) => {
|
|
||||||
if (!confirm("Möchten Sie dieses Listing wirklich löschen?")) {
|
|
||||||
e.preventDefault();
|
|
||||||
}
|
|
||||||
}}
|
|
||||||
>
|
|
||||||
🗑️ Listing löschen
|
|
||||||
</Button>
|
|
||||||
</form>
|
|
||||||
</CardContent>
|
</CardContent>
|
||||||
</Card>
|
</Card>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@ -21,6 +21,12 @@ export async function updateListing(formData: FormData) {
|
|||||||
const status = formData.get("status") as string;
|
const status = formData.get("status") as string;
|
||||||
const isFavorite = formData.get("isFavorite") === "true";
|
const isFavorite = formData.get("isFavorite") === "true";
|
||||||
|
|
||||||
|
// Fetch slug before update for revalidatePath and redirect
|
||||||
|
const existing = await prisma.listing.findUnique({
|
||||||
|
where: { id },
|
||||||
|
select: { slug: true },
|
||||||
|
});
|
||||||
|
|
||||||
await prisma.listing.update({
|
await prisma.listing.update({
|
||||||
where: { id },
|
where: { id },
|
||||||
data: {
|
data: {
|
||||||
@ -41,14 +47,34 @@ export async function updateListing(formData: FormData) {
|
|||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const slug = existing?.slug;
|
||||||
revalidatePath("/listings");
|
revalidatePath("/listings");
|
||||||
revalidatePath(`/listings/${id}`);
|
if (slug) {
|
||||||
redirect(`/listings`);
|
revalidatePath(`/listings/${slug}`);
|
||||||
|
}
|
||||||
|
redirect(`/listings/${slug ?? ""}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function deleteListing(formData: FormData) {
|
export async function deleteListing(formData: FormData) {
|
||||||
const id = formData.get("id") as string;
|
const id = formData.get("id") as string;
|
||||||
|
|
||||||
|
// Delete related records first to avoid foreign key constraint errors
|
||||||
|
await prisma.listingTag.deleteMany({
|
||||||
|
where: { listingId: id },
|
||||||
|
});
|
||||||
|
|
||||||
|
await prisma.listingSleepingOption.deleteMany({
|
||||||
|
where: { listingId: id },
|
||||||
|
});
|
||||||
|
|
||||||
|
await prisma.listingImage.deleteMany({
|
||||||
|
where: { listingId: id },
|
||||||
|
});
|
||||||
|
|
||||||
|
await prisma.adminNote.deleteMany({
|
||||||
|
where: { listingId: id },
|
||||||
|
});
|
||||||
|
|
||||||
await prisma.listing.delete({
|
await prisma.listing.delete({
|
||||||
where: { id },
|
where: { id },
|
||||||
});
|
});
|
||||||
|
|||||||
@ -114,7 +114,7 @@ export default async function ListingDetailPage({ params }: PageProps) {
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{listing.sleepingOptions.length > 0 && (
|
{listing.sleepingOptions.length > 0 ? (
|
||||||
<div>
|
<div>
|
||||||
<h3 className="font-medium mb-2">Schlafmöglichkeiten</h3>
|
<h3 className="font-medium mb-2">Schlafmöglichkeiten</h3>
|
||||||
<div className="space-y-2">
|
<div className="space-y-2">
|
||||||
@ -128,6 +128,10 @@ export default async function ListingDetailPage({ params }: PageProps) {
|
|||||||
))}
|
))}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
) : (
|
||||||
|
<p className="text-slate-500 text-sm">
|
||||||
|
⚠️ Schlafplatzdetails nicht erkannt
|
||||||
|
</p>
|
||||||
)}
|
)}
|
||||||
</CardContent>
|
</CardContent>
|
||||||
</Card>
|
</Card>
|
||||||
@ -158,11 +162,24 @@ export default async function ListingDetailPage({ params }: PageProps) {
|
|||||||
<Card>
|
<Card>
|
||||||
<CardContent className="p-6">
|
<CardContent className="p-6">
|
||||||
<h1 className="text-2xl font-bold mb-2">{listing.title}</h1>
|
<h1 className="text-2xl font-bold mb-2">{listing.title}</h1>
|
||||||
<p className="text-slate-500 mb-4">📍 {listing.locationText || "Ort unbekannt"}</p>
|
<p className="text-slate-500 mb-4">📍 {listing.locationText || "Ort nicht erkannt"}</p>
|
||||||
|
|
||||||
<div className="flex items-baseline gap-2 mb-4">
|
<div className="flex items-baseline gap-2 mb-4">
|
||||||
<span className="text-4xl font-bold">{formatPrice(listing.nightlyPrice)}</span>
|
{listing.nightlyPrice != null ? (
|
||||||
<span className="text-slate-500">/ Nacht</span>
|
<>
|
||||||
|
<span className="text-4xl font-bold">{formatPrice(listing.nightlyPrice)}</span>
|
||||||
|
<span className="text-slate-500">/ Nacht</span>
|
||||||
|
</>
|
||||||
|
) : (
|
||||||
|
<div>
|
||||||
|
<span className="text-2xl font-bold text-slate-400">Preis auf Anfrage</span>
|
||||||
|
<p className="text-xs text-slate-500 mt-1">
|
||||||
|
{listing.priceStatus === 'REQUIRES_TRIP_CONTEXT'
|
||||||
|
? '💡 Mit Reisedaten ermittelbar'
|
||||||
|
: 'Nicht ermittelbar'}
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div className="flex items-center gap-2 mb-4">
|
<div className="flex items-center gap-2 mb-4">
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
"use client";
|
"use client";
|
||||||
|
|
||||||
import { useState } from "react";
|
import { useState } from "react";
|
||||||
|
import { useRouter } from "next/navigation";
|
||||||
import { Button } from "@/components/ui/button";
|
import { Button } from "@/components/ui/button";
|
||||||
import { deleteListing } from "./actions";
|
import { deleteListing } from "./actions";
|
||||||
|
|
||||||
@ -11,30 +12,39 @@ interface DeleteListingButtonProps {
|
|||||||
|
|
||||||
export function DeleteListingButton({ listingId, listingTitle }: DeleteListingButtonProps) {
|
export function DeleteListingButton({ listingId, listingTitle }: DeleteListingButtonProps) {
|
||||||
const [isDeleting, setIsDeleting] = useState(false);
|
const [isDeleting, setIsDeleting] = useState(false);
|
||||||
|
const [error, setError] = useState<string | null>(null);
|
||||||
|
const router = useRouter();
|
||||||
|
|
||||||
const handleDelete = async () => {
|
const handleDelete = async () => {
|
||||||
if (!confirm(`"${listingTitle}" wirklich löschen?`)) return;
|
if (!confirm(`"${listingTitle}" wirklich löschen?`)) return;
|
||||||
|
|
||||||
|
setError(null);
|
||||||
setIsDeleting(true);
|
setIsDeleting(true);
|
||||||
try {
|
try {
|
||||||
const formData = new FormData();
|
const formData = new FormData();
|
||||||
formData.append("id", listingId);
|
formData.append("id", listingId);
|
||||||
await deleteListing(formData);
|
await deleteListing(formData);
|
||||||
} catch (error) {
|
router.refresh();
|
||||||
alert("Fehler beim Löschen: " + (error as Error).message);
|
} catch (err) {
|
||||||
|
setError(err instanceof Error ? err.message : "Fehler beim Löschen");
|
||||||
setIsDeleting(false);
|
setIsDeleting(false);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<Button
|
<div className="flex flex-col items-end gap-1">
|
||||||
variant="destructive"
|
<Button
|
||||||
size="sm"
|
variant="destructive"
|
||||||
onClick={handleDelete}
|
size="sm"
|
||||||
disabled={isDeleting}
|
onClick={handleDelete}
|
||||||
className="text-sm"
|
disabled={isDeleting}
|
||||||
>
|
className="text-sm"
|
||||||
{isDeleting ? "⏳" : "🗑️"}
|
>
|
||||||
</Button>
|
{isDeleting ? "⏳" : "🗑️"}
|
||||||
|
</Button>
|
||||||
|
{error && (
|
||||||
|
<p className="text-xs text-red-500 max-w-[120px] text-right">{error}</p>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -2,6 +2,7 @@ import { prisma } from "@/lib/prisma";
|
|||||||
import { Card, CardContent } from "@/components/ui/card";
|
import { Card, CardContent } from "@/components/ui/card";
|
||||||
import { Badge } from "@/components/ui/badge";
|
import { Badge } from "@/components/ui/badge";
|
||||||
import { Button } from "@/components/ui/button";
|
import { Button } from "@/components/ui/button";
|
||||||
|
import { formatPrice, formatRating } from "@/lib/utils";
|
||||||
import Link from "next/link";
|
import Link from "next/link";
|
||||||
import { DeleteListingButton } from "./delete-button";
|
import { DeleteListingButton } from "./delete-button";
|
||||||
|
|
||||||
@ -57,8 +58,12 @@ export default async function ListingsPage() {
|
|||||||
|
|
||||||
{/* Price & Rating */}
|
{/* Price & Rating */}
|
||||||
<div className="flex justify-between items-center mb-3">
|
<div className="flex justify-between items-center mb-3">
|
||||||
<span className="text-xl font-bold">€{listing.nightlyPrice?.toFixed(2) || "—"}</span>
|
{listing.nightlyPrice != null ? (
|
||||||
<span className="text-sm">⭐ {listing.rating?.toFixed(2) || "—"}</span>
|
<span className="text-xl font-bold">{formatPrice(listing.nightlyPrice)}</span>
|
||||||
|
) : (
|
||||||
|
<span className="text-sm text-slate-400">Preis auf Anfrage</span>
|
||||||
|
)}
|
||||||
|
<span className="text-sm">⭐ {formatRating(listing.rating)}</span>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{/* Tags */}
|
{/* Tags */}
|
||||||
@ -77,12 +82,14 @@ export default async function ListingsPage() {
|
|||||||
)}
|
)}
|
||||||
|
|
||||||
{/* Sleep Info */}
|
{/* Sleep Info */}
|
||||||
{listing.suitableFor4 ? (
|
{listing.suitableFor4 === true ? (
|
||||||
<p className="text-xs text-green-600 font-medium mb-3">✅ Geeignet für 4 Personen</p>
|
<p className="text-xs text-green-600 font-medium mb-3">✅ Geeignet für 4 Personen</p>
|
||||||
) : (
|
) : listing.suitableFor4 === false ? (
|
||||||
<p className="text-xs text-amber-600 font-medium mb-3">
|
<p className="text-xs text-amber-600 font-medium mb-3">
|
||||||
⚠️ Nicht ideal für 4 {listing.extraMattressesNeededFor4 ? `(+${listing.extraMattressesNeededFor4} Matratzen)` : ""}
|
⚠️ Nicht ideal für 4 {listing.extraMattressesNeededFor4 ? `(+${listing.extraMattressesNeededFor4} Matratzen)` : ""}
|
||||||
</p>
|
</p>
|
||||||
|
) : (
|
||||||
|
<p className="text-xs text-slate-400 font-medium mb-3">❓ Schlafplatz-Info unbekannt</p>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{/* Actions */}
|
{/* Actions */}
|
||||||
|
|||||||
@ -1,22 +1,8 @@
|
|||||||
import * as cheerio from "cheerio";
|
import { scrapeAirbnbWithPuppeteer } from "./puppeteer-scraper";
|
||||||
import { normalizeAirbnbUrlWithContext } from "./url-normalizer";
|
import { normalizeAirbnbUrlWithContext } from "./url-normalizer";
|
||||||
import { parseCapacityFacts, parseRating, parseHost, parseMaxGuests, extractVisibleText, parseTitle } from "./parsers/text-patterns";
|
|
||||||
import { parseSleepingArrangements, calculateSleepingStats, deriveSleepingFromBeds } from "./parsers/sleeping";
|
|
||||||
import { extractPrice } from "./parsers/price";
|
|
||||||
import { extractLocation } from "./parsers/location";
|
|
||||||
import { parseJsonLd } from "./parsers/jsonld";
|
|
||||||
import {
|
|
||||||
ExtractedListing,
|
|
||||||
FieldSource,
|
|
||||||
field,
|
|
||||||
mergeField,
|
|
||||||
TripContext,
|
|
||||||
SleepingDataQuality,
|
|
||||||
PriceStatus
|
|
||||||
} from "./types";
|
|
||||||
|
|
||||||
// ============================================
|
// ============================================
|
||||||
// Main Scraper Function
|
// Main Scraper Function - Uses Puppeteer for JS rendering
|
||||||
// ============================================
|
// ============================================
|
||||||
|
|
||||||
export async function scrapeAirbnbListing(
|
export async function scrapeAirbnbListing(
|
||||||
@ -24,156 +10,27 @@ export async function scrapeAirbnbListing(
|
|||||||
options?: { tripContext?: TripContext; usePlaywright?: boolean }
|
options?: { tripContext?: TripContext; usePlaywright?: boolean }
|
||||||
): Promise<ExtractedListing | null> {
|
): Promise<ExtractedListing | null> {
|
||||||
try {
|
try {
|
||||||
// Step 1: Normalize URL and extract trip context
|
// Normalize URL and extract trip context
|
||||||
const normalized = normalizeAirbnbUrlWithContext(url);
|
const normalized = normalizeAirbnbUrlWithContext(url);
|
||||||
|
|
||||||
// Merge trip context from options with URL-extracted context
|
// Merge trip context from options with URL-extracted context
|
||||||
const tripContext: TripContext = {
|
const tripContext = {
|
||||||
checkIn: options?.tripContext?.checkIn || normalized.tripContext.checkIn,
|
checkIn: options?.tripContext?.checkIn || normalized.tripContext.checkIn,
|
||||||
checkOut: options?.tripContext?.checkOut || normalized.tripContext.checkOut,
|
checkOut: options?.tripContext?.checkOut || normalized.tripContext.checkOut,
|
||||||
adults: options?.tripContext?.adults || normalized.tripContext.adults || 4,
|
adults: options?.tripContext?.adults || normalized.tripContext.adults || 4,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Step 2: Fetch HTML
|
|
||||||
const html = await fetchHtml(normalized.normalized);
|
|
||||||
const $ = cheerio.load(html);
|
|
||||||
|
|
||||||
// Step 3: Extract visible text for pattern matching
|
// Use Puppeteer to render JavaScript and extract data
|
||||||
const visibleText = extractVisibleText(html);
|
const result = await scrapeAirbnbWithPuppeteer(normalized.normalized, { tripContext });
|
||||||
|
|
||||||
// Step 4: Run all parsers
|
if (result) {
|
||||||
const jsonldData = parseJsonLd($);
|
// Update URLs with normalized values
|
||||||
const capacityFacts = parseCapacityFacts(visibleText);
|
result.originalUrl = normalized.original;
|
||||||
const ratingFacts = parseRating(visibleText);
|
result.normalizedUrl = normalized.normalized;
|
||||||
const hostName = parseHost(visibleText);
|
result.externalId = normalized.externalId;
|
||||||
const maxGuests = parseMaxGuests(visibleText);
|
result.tripContext = tripContext;
|
||||||
const sleepingOptions = parseSleepingArrangements(visibleText);
|
|
||||||
const priceData = extractPrice(html, $, tripContext);
|
|
||||||
const locationData = extractLocation($, html);
|
|
||||||
const pageTitle = parseTitle(html);
|
|
||||||
|
|
||||||
// Step 5: Build the result with priority: jsonld > text_pattern > derived
|
|
||||||
const result: ExtractedListing = {
|
|
||||||
// URLs
|
|
||||||
originalUrl: normalized.original,
|
|
||||||
normalizedUrl: normalized.normalized,
|
|
||||||
externalId: normalized.externalId,
|
|
||||||
|
|
||||||
// Basic Info
|
|
||||||
title: mergeField(
|
|
||||||
jsonldData.title ? field(jsonldData.title, 'jsonld', 'high') : null,
|
|
||||||
pageTitle ? field(pageTitle, 'text_pattern', 'medium') : field(null, 'derived', 'low')
|
|
||||||
),
|
|
||||||
description: mergeField(
|
|
||||||
jsonldData.description ? field(jsonldData.description, 'jsonld', 'high') : null,
|
|
||||||
field(null, 'derived', 'low')
|
|
||||||
),
|
|
||||||
|
|
||||||
// Location
|
|
||||||
locationText: locationData.locationText,
|
|
||||||
latitude: mergeField(
|
|
||||||
jsonldData.latitude ? field(jsonldData.latitude, 'jsonld', 'high') : null,
|
|
||||||
locationData.latitude.value !== null ? locationData.latitude : field(null, 'derived', 'low')
|
|
||||||
),
|
|
||||||
longitude: mergeField(
|
|
||||||
jsonldData.longitude ? field(jsonldData.longitude, 'jsonld', 'high') : null,
|
|
||||||
locationData.longitude.value !== null ? locationData.longitude : field(null, 'derived', 'low')
|
|
||||||
),
|
|
||||||
|
|
||||||
// Pricing
|
|
||||||
tripContext,
|
|
||||||
nightlyPrice: priceData.nightly,
|
|
||||||
totalPrice: priceData.total,
|
|
||||||
priceStatus: priceData.status,
|
|
||||||
|
|
||||||
// Rating
|
|
||||||
rating: mergeField(
|
|
||||||
ratingFacts ? field(ratingFacts.rating, 'text_pattern', 'high') : null,
|
|
||||||
jsonldData.rating ? field(jsonldData.rating, 'jsonld', 'medium') : null
|
|
||||||
),
|
|
||||||
reviewCount: mergeField(
|
|
||||||
ratingFacts && ratingFacts.reviewCount > 0 ? field(ratingFacts.reviewCount, 'text_pattern', 'high') : null,
|
|
||||||
jsonldData.reviewCount ? field(jsonldData.reviewCount, 'jsonld', 'medium') : null
|
|
||||||
),
|
|
||||||
|
|
||||||
// Capacity
|
|
||||||
guestCount: mergeField(
|
|
||||||
capacityFacts ? field(capacityFacts.guests, 'text_pattern', 'high') : null,
|
|
||||||
field(null, 'derived', 'low')
|
|
||||||
),
|
|
||||||
officialGuestCount: mergeField(
|
|
||||||
maxGuests ? field(maxGuests, 'text_pattern', 'high') : null,
|
|
||||||
field(null, 'derived', 'low')
|
|
||||||
),
|
|
||||||
bedrooms: mergeField(
|
|
||||||
capacityFacts ? field(capacityFacts.bedrooms, 'text_pattern', 'high') : null,
|
|
||||||
field(null, 'derived', 'low')
|
|
||||||
),
|
|
||||||
beds: mergeField(
|
|
||||||
capacityFacts ? field(capacityFacts.beds, 'text_pattern', 'high') : null,
|
|
||||||
field(null, 'derived', 'low')
|
|
||||||
),
|
|
||||||
bathrooms: mergeField(
|
|
||||||
capacityFacts ? field(capacityFacts.bathrooms, 'text_pattern', 'high') : null,
|
|
||||||
field(null, 'derived', 'low')
|
|
||||||
),
|
|
||||||
|
|
||||||
// Sleeping
|
|
||||||
sleepingOptions,
|
|
||||||
maxSleepingPlaces: 0,
|
|
||||||
suitableFor4: false,
|
|
||||||
extraMattressesNeededFor4: 0,
|
|
||||||
sleepingDataQuality: 'UNKNOWN',
|
|
||||||
|
|
||||||
// Host
|
|
||||||
hostName: mergeField(
|
|
||||||
hostName ? field(hostName, 'text_pattern', 'high') : null,
|
|
||||||
jsonldData.hostName ? field(jsonldData.hostName, 'jsonld', 'medium') : null
|
|
||||||
),
|
|
||||||
|
|
||||||
// Amenities
|
|
||||||
amenities: jsonldData.amenities || [],
|
|
||||||
|
|
||||||
// Images
|
|
||||||
images: jsonldData.images || [],
|
|
||||||
coverImage: jsonldData.images?.[0] || null,
|
|
||||||
|
|
||||||
// Other
|
|
||||||
cancellationPolicy: field(null, 'derived', 'low'),
|
|
||||||
|
|
||||||
// Debug
|
|
||||||
rawSnippets: {
|
|
||||||
title: jsonldData.title || '',
|
|
||||||
visibleText: visibleText.substring(0, 2000),
|
|
||||||
},
|
|
||||||
extractionLog: [
|
|
||||||
`URL normalized: ${normalized.normalized}`,
|
|
||||||
`External ID: ${normalized.externalId}`,
|
|
||||||
`Trip context: ${JSON.stringify(tripContext)}`,
|
|
||||||
`Capacity facts: ${capacityFacts ? JSON.stringify(capacityFacts) : 'none'}`,
|
|
||||||
`Rating facts: ${ratingFacts ? JSON.stringify(ratingFacts) : 'none'}`,
|
|
||||||
`Sleeping options: ${sleepingOptions.length} found`,
|
|
||||||
],
|
|
||||||
};
|
|
||||||
|
|
||||||
// Step 6: Calculate sleeping stats
|
|
||||||
if (sleepingOptions.length > 0) {
|
|
||||||
const stats = calculateSleepingStats(sleepingOptions);
|
|
||||||
result.maxSleepingPlaces = stats.maxSleepingPlaces;
|
|
||||||
result.suitableFor4 = stats.suitableFor4;
|
|
||||||
result.extraMattressesNeededFor4 = stats.extraMattressesNeededFor4;
|
|
||||||
result.sleepingDataQuality = 'EXACT';
|
|
||||||
} else if (result.beds.value && result.guestCount.value) {
|
|
||||||
// Derive from beds and guest count
|
|
||||||
const derivedOptions = deriveSleepingFromBeds(result.beds.value, result.guestCount.value);
|
|
||||||
const stats = calculateSleepingStats(derivedOptions);
|
|
||||||
result.sleepingOptions = derivedOptions;
|
|
||||||
result.maxSleepingPlaces = stats.maxSleepingPlaces;
|
|
||||||
result.suitableFor4 = stats.suitableFor4;
|
|
||||||
result.extraMattressesNeededFor4 = stats.extraMattressesNeededFor4;
|
|
||||||
result.sleepingDataQuality = 'DERIVED';
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("Scraping failed:", error);
|
console.error("Scraping failed:", error);
|
||||||
@ -181,36 +38,9 @@ export async function scrapeAirbnbListing(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ============================================
|
|
||||||
// HTML Fetcher - with better error handling and logging
|
|
||||||
// ============================================
|
|
||||||
|
|
||||||
async function fetchHtml(url: string): Promise<string> {
|
|
||||||
const response = await fetch(url, {
|
|
||||||
headers: {
|
|
||||||
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
||||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
|
||||||
"Accept-Language": "de-DE,de;q=0.9,en-US;q=0.8,en;q=0.7",
|
|
||||||
"Accept-Encoding": "gzip, deflate, br",
|
|
||||||
"Cache-Control": "no-cache",
|
|
||||||
"Upgrade-Insecure-Requests": "1",
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
if (!response.ok) {
|
|
||||||
throw new Error(`HTTP ${response.status} for ${url}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
const html = await response.text();
|
|
||||||
|
|
||||||
// Log some debug info
|
|
||||||
console.log(`[Scraper] Fetched ${url.length} chars`);
|
|
||||||
console.log(`[Scraper] Contains 'application/ld+json': ${html.includes('application/ld+json')}`);
|
|
||||||
console.log(`[Scraper] Contains 'airbnb': ${html.toLowerCase().includes('airbnb')}`);
|
|
||||||
|
|
||||||
return html;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Re-export utilities for backward compatibility
|
// Re-export utilities for backward compatibility
|
||||||
export { normalizeAirbnbUrlWithContext as normalizeAirbnbUrl } from "./url-normalizer";
|
export { normalizeAirbnbUrlWithContext as normalizeAirbnbUrl } from "./url-normalizer";
|
||||||
export { extractAirbnbExternalId } from "./url-normalizer";
|
export { extractAirbnbExternalId } from "./url-normalizer";
|
||||||
|
|
||||||
|
// Need to import TripContext for TypeScript
|
||||||
|
import type { TripContext, ExtractedListing } from "./types";
|
||||||
|
|||||||
@ -12,6 +12,7 @@ export interface JsonLdData {
|
|||||||
cancellationPolicy: string | null;
|
cancellationPolicy: string | null;
|
||||||
hostName: string | null;
|
hostName: string | null;
|
||||||
amenities: string[];
|
amenities: string[];
|
||||||
|
price: number | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -31,6 +32,7 @@ export function parseJsonLd($: cheerio.CheerioAPI): JsonLdData {
|
|||||||
cancellationPolicy: null,
|
cancellationPolicy: null,
|
||||||
hostName: null,
|
hostName: null,
|
||||||
amenities: [],
|
amenities: [],
|
||||||
|
price: null,
|
||||||
};
|
};
|
||||||
|
|
||||||
const jsonLdScript = $('script[type="application/ld+json"]').html();
|
const jsonLdScript = $('script[type="application/ld+json"]').html();
|
||||||
@ -117,6 +119,15 @@ export function parseJsonLd($: cheerio.CheerioAPI): JsonLdData {
|
|||||||
.filter(Boolean);
|
.filter(Boolean);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Price - extract from makesOffer.offers[0].price or offers.price
|
||||||
|
const priceValue = jsonData.makesOffer?.offers?.[0]?.price ?? jsonData.offers?.price;
|
||||||
|
if (priceValue !== undefined && priceValue !== null) {
|
||||||
|
const parsed = typeof priceValue === 'number' ? priceValue : parseFloat(String(priceValue));
|
||||||
|
if (!isNaN(parsed)) {
|
||||||
|
result.price = parsed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Failed to parse JSON-LD:', error);
|
console.error('Failed to parse JSON-LD:', error);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -9,11 +9,12 @@ function tryExtractPriceFromHtml(html: string, $: cheerio.CheerioAPI): number |
|
|||||||
// Try various price selectors that Airbnb might use
|
// Try various price selectors that Airbnb might use
|
||||||
const priceSelectors = [
|
const priceSelectors = [
|
||||||
'[data-testid="price-amount"]',
|
'[data-testid="price-amount"]',
|
||||||
|
'[data-testid="book-it-default"] span',
|
||||||
'span[class*="Price"]',
|
'span[class*="Price"]',
|
||||||
'span[class*="price"]',
|
'span[class*="price"]',
|
||||||
'[itemprop="price"]',
|
'[itemprop="price"]',
|
||||||
'._1y6k3r2',
|
'div[class*="bookit"] span',
|
||||||
'._1dss1omb',
|
'section[class*="booking"] span',
|
||||||
];
|
];
|
||||||
|
|
||||||
for (const selector of priceSelectors) {
|
for (const selector of priceSelectors) {
|
||||||
@ -33,6 +34,16 @@ function tryExtractPriceFromHtml(html: string, $: cheerio.CheerioAPI): number |
|
|||||||
return priceFromHtml;
|
return priceFromHtml;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Fallback: look for "total" near price numbers
|
||||||
|
const totalPattern = /total[^€$£]*[€$£]\s*(\d[\d.,]*)/i;
|
||||||
|
const totalMatch = html.match(totalPattern);
|
||||||
|
if (totalMatch) {
|
||||||
|
const parsed = parseFloat(totalMatch[1].replace(/[.,](?=\d{3})/g, '').replace(',', '.'));
|
||||||
|
if (!isNaN(parsed) && parsed > 0) {
|
||||||
|
return parsed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -2,8 +2,14 @@ import { BedType, SleepingOption } from '../types';
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Bed type configuration: maps text patterns to bed types, spots per unit, and quality
|
* Bed type configuration: maps text patterns to bed types, spots per unit, and quality
|
||||||
|
*
|
||||||
|
* IMPORTANT: Longer/more specific patterns MUST come before shorter ones
|
||||||
|
* (e.g., "bunk bed" before "bed", "double bed" before "double")
|
||||||
*/
|
*/
|
||||||
export const BED_TYPE_CONFIG: Record<string, { type: BedType; spots: number; quality: 'FULL' | 'AUXILIARY' }> = {
|
export const BED_TYPE_CONFIG: Record<string, { type: BedType; spots: number; quality: 'FULL' | 'AUXILIARY' }> = {
|
||||||
|
// Compound bed types (must come first to avoid partial matches)
|
||||||
|
'bunk bed': { type: 'BUNK', spots: 2, quality: 'FULL' },
|
||||||
|
'etagenbett': { type: 'BUNK', spots: 2, quality: 'FULL' },
|
||||||
'double bed': { type: 'DOUBLE', spots: 2, quality: 'FULL' },
|
'double bed': { type: 'DOUBLE', spots: 2, quality: 'FULL' },
|
||||||
'doppelbett': { type: 'DOUBLE', spots: 2, quality: 'FULL' },
|
'doppelbett': { type: 'DOUBLE', spots: 2, quality: 'FULL' },
|
||||||
'queen bed': { type: 'QUEEN', spots: 2, quality: 'FULL' },
|
'queen bed': { type: 'QUEEN', spots: 2, quality: 'FULL' },
|
||||||
@ -11,11 +17,27 @@ export const BED_TYPE_CONFIG: Record<string, { type: BedType; spots: number; qua
|
|||||||
'single bed': { type: 'SINGLE', spots: 1, quality: 'FULL' },
|
'single bed': { type: 'SINGLE', spots: 1, quality: 'FULL' },
|
||||||
'twin bed': { type: 'SINGLE', spots: 1, quality: 'FULL' },
|
'twin bed': { type: 'SINGLE', spots: 1, quality: 'FULL' },
|
||||||
'einzelbett': { type: 'SINGLE', spots: 1, quality: 'FULL' },
|
'einzelbett': { type: 'SINGLE', spots: 1, quality: 'FULL' },
|
||||||
'bunk bed': { type: 'BUNK', spots: 2, quality: 'FULL' },
|
|
||||||
'etagenbett': { type: 'BUNK', spots: 2, quality: 'FULL' },
|
|
||||||
'sofa bed': { type: 'SOFA_BED', spots: 2, quality: 'FULL' },
|
'sofa bed': { type: 'SOFA_BED', spots: 2, quality: 'FULL' },
|
||||||
'pull-out sofa': { type: 'SOFA_BED', spots: 2, quality: 'FULL' },
|
'pull-out sofa': { type: 'SOFA_BED', spots: 2, quality: 'FULL' },
|
||||||
'schlafsofa': { type: 'SOFA_BED', spots: 2, quality: 'FULL' },
|
'schlafsofa': { type: 'SOFA_BED', spots: 2, quality: 'FULL' },
|
||||||
|
'murphy bed': { type: 'DOUBLE', spots: 2, quality: 'FULL' },
|
||||||
|
'day bed': { type: 'SINGLE', spots: 1, quality: 'FULL' },
|
||||||
|
|
||||||
|
// Standalone bed types (without "bed" word)
|
||||||
|
'double': { type: 'DOUBLE', spots: 2, quality: 'FULL' },
|
||||||
|
'queen': { type: 'QUEEN', spots: 2, quality: 'FULL' },
|
||||||
|
'king': { type: 'KING', spots: 2, quality: 'FULL' },
|
||||||
|
'single': { type: 'SINGLE', spots: 1, quality: 'FULL' },
|
||||||
|
'twin': { type: 'SINGLE', spots: 1, quality: 'FULL' },
|
||||||
|
|
||||||
|
// Plural forms
|
||||||
|
'doubles': { type: 'DOUBLE', spots: 2, quality: 'FULL' },
|
||||||
|
'singles': { type: 'SINGLE', spots: 1, quality: 'FULL' },
|
||||||
|
'queens': { type: 'QUEEN', spots: 2, quality: 'FULL' },
|
||||||
|
'kings': { type: 'KING', spots: 2, quality: 'FULL' },
|
||||||
|
'bunks': { type: 'BUNK', spots: 2, quality: 'FULL' },
|
||||||
|
|
||||||
|
// Auxiliary sleeping
|
||||||
'couch': { type: 'SOFA', spots: 1, quality: 'AUXILIARY' },
|
'couch': { type: 'SOFA', spots: 1, quality: 'AUXILIARY' },
|
||||||
'sofa': { type: 'SOFA', spots: 1, quality: 'AUXILIARY' },
|
'sofa': { type: 'SOFA', spots: 1, quality: 'AUXILIARY' },
|
||||||
'air mattress': { type: 'AIR_MATTRESS', spots: 1, quality: 'AUXILIARY' },
|
'air mattress': { type: 'AIR_MATTRESS', spots: 1, quality: 'AUXILIARY' },
|
||||||
@ -24,9 +46,11 @@ export const BED_TYPE_CONFIG: Record<string, { type: BedType; spots: number; qua
|
|||||||
'extra mattress': { type: 'EXTRA_MATTRESS', spots: 1, quality: 'AUXILIARY' },
|
'extra mattress': { type: 'EXTRA_MATTRESS', spots: 1, quality: 'AUXILIARY' },
|
||||||
'zusatzmatratze': { type: 'EXTRA_MATTRESS', spots: 1, quality: 'AUXILIARY' },
|
'zusatzmatratze': { type: 'EXTRA_MATTRESS', spots: 1, quality: 'AUXILIARY' },
|
||||||
'futon': { type: 'FUTON', spots: 1, quality: 'AUXILIARY' },
|
'futon': { type: 'FUTON', spots: 1, quality: 'AUXILIARY' },
|
||||||
|
'hammock': { type: 'SINGLE', spots: 1, quality: 'AUXILIARY' },
|
||||||
};
|
};
|
||||||
|
|
||||||
// Pattern: "1 double bed" or "2 single beds" or "Bedroom 1: 1 queen bed"
|
// Pattern: "1 double bed" or "2 single beds" or "Bedroom 1: 1 queen bed"
|
||||||
|
// Note: lastIndex is reset before each use to avoid global flag bug
|
||||||
const BED_PATTERN = /(?:(?:bedroom|schlafzimmer|room|zimmer)\s*\d*\s*:?\s*)?(\d+)\s+([a-z\s-]+?)(?:\s|$|,|\.)/gi;
|
const BED_PATTERN = /(?:(?:bedroom|schlafzimmer|room|zimmer)\s*\d*\s*:?\s*)?(\d+)\s+([a-z\s-]+?)(?:\s|$|,|\.)/gi;
|
||||||
|
|
||||||
export interface SleepingStats {
|
export interface SleepingStats {
|
||||||
@ -47,6 +71,9 @@ export function parseSleepingArrangements(text: string): SleepingOption[] {
|
|||||||
const options: SleepingOption[] = [];
|
const options: SleepingOption[] = [];
|
||||||
const lowerText = text.toLowerCase();
|
const lowerText = text.toLowerCase();
|
||||||
|
|
||||||
|
// Reset lastIndex to avoid bug with global flag + exec() loop
|
||||||
|
BED_PATTERN.lastIndex = 0;
|
||||||
|
|
||||||
let match;
|
let match;
|
||||||
while ((match = BED_PATTERN.exec(lowerText)) !== null) {
|
while ((match = BED_PATTERN.exec(lowerText)) !== null) {
|
||||||
const quantity = parseInt(match[1], 10);
|
const quantity = parseInt(match[1], 10);
|
||||||
@ -114,30 +141,78 @@ export function calculateSleepingStats(options: SleepingOption[]): SleepingStats
|
|||||||
/**
|
/**
|
||||||
* Derive sleeping options from bed count (fallback with low confidence)
|
* Derive sleeping options from bed count (fallback with low confidence)
|
||||||
* Used when detailed sleeping arrangement text is not available
|
* Used when detailed sleeping arrangement text is not available
|
||||||
|
*
|
||||||
|
* Logic:
|
||||||
|
* - beds >= 2 && guestCount >= beds * 1.5 → mix of double/single (assume mostly double)
|
||||||
|
* - beds === 1 && guestCount >= 2 → double
|
||||||
|
* - beds === 1 && guestCount === 1 → single
|
||||||
|
* - beds >= 2 && guestCount < beds * 1.5 → mostly single
|
||||||
*/
|
*/
|
||||||
export function deriveSleepingFromBeds(beds: number, guestCount: number): SleepingOption[] {
|
export function deriveSleepingFromBeds(beds: number, guestCount: number): SleepingOption[] {
|
||||||
if (!beds || beds < 1) return [];
|
if (!beds || beds < 1) return [];
|
||||||
|
|
||||||
// Assume beds are double beds if guest count suggests it
|
const options: SleepingOption[] = [];
|
||||||
const avgGuestsPerBed = guestCount ? guestCount / beds : 2;
|
|
||||||
|
|
||||||
if (avgGuestsPerBed >= 1.5) {
|
if (beds === 1) {
|
||||||
// Likely double beds
|
// Single bed scenario
|
||||||
return [{
|
if (guestCount >= 2) {
|
||||||
bedType: 'DOUBLE',
|
// 1 bed for 2+ guests → must be double
|
||||||
quantity: beds,
|
options.push({
|
||||||
spotsPerUnit: 2,
|
bedType: 'DOUBLE',
|
||||||
quality: 'FULL',
|
quantity: 1,
|
||||||
label: 'double bed (derived)',
|
spotsPerUnit: 2,
|
||||||
}];
|
quality: 'FULL',
|
||||||
} else {
|
label: 'Doppelbett (abgeleitet)',
|
||||||
// Likely single beds
|
});
|
||||||
return [{
|
} else {
|
||||||
bedType: 'SINGLE',
|
// 1 bed for 1 guest → single
|
||||||
quantity: beds,
|
options.push({
|
||||||
spotsPerUnit: 1,
|
bedType: 'SINGLE',
|
||||||
quality: 'FULL',
|
quantity: 1,
|
||||||
label: 'single bed (derived)',
|
spotsPerUnit: 1,
|
||||||
}];
|
quality: 'FULL',
|
||||||
|
label: 'Einzelbett (abgeleitet)',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} else if (beds >= 2) {
|
||||||
|
// Multiple beds
|
||||||
|
const avgGuestsPerBed = guestCount ? guestCount / beds : 2;
|
||||||
|
|
||||||
|
if (avgGuestsPerBed >= 1.5) {
|
||||||
|
// High guest-to-bed ratio → mix of double and single
|
||||||
|
// Assume roughly half are double, half single
|
||||||
|
const doubleCount = Math.ceil(beds / 2);
|
||||||
|
const singleCount = beds - doubleCount;
|
||||||
|
|
||||||
|
if (doubleCount > 0) {
|
||||||
|
options.push({
|
||||||
|
bedType: 'DOUBLE',
|
||||||
|
quantity: doubleCount,
|
||||||
|
spotsPerUnit: 2,
|
||||||
|
quality: 'FULL',
|
||||||
|
label: 'Doppelbett (abgeleitet)',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if (singleCount > 0) {
|
||||||
|
options.push({
|
||||||
|
bedType: 'SINGLE',
|
||||||
|
quantity: singleCount,
|
||||||
|
spotsPerUnit: 1,
|
||||||
|
quality: 'FULL',
|
||||||
|
label: 'Einzelbett (abgeleitet)',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Low guest-to-bed ratio → mostly single beds
|
||||||
|
options.push({
|
||||||
|
bedType: 'SINGLE',
|
||||||
|
quantity: beds,
|
||||||
|
spotsPerUnit: 1,
|
||||||
|
quality: 'FULL',
|
||||||
|
label: 'Einzelbett (abgeleitet)',
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return options;
|
||||||
}
|
}
|
||||||
|
|||||||
419
src/lib/airbnb/puppeteer-scraper.ts
Normal file
419
src/lib/airbnb/puppeteer-scraper.ts
Normal file
@ -0,0 +1,419 @@
|
|||||||
|
import puppeteer from 'puppeteer-extra';
|
||||||
|
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||||
|
import type { Browser, Page } from 'puppeteer';
|
||||||
|
import * as cheerio from 'cheerio';
|
||||||
|
import {
|
||||||
|
ExtractedListing,
|
||||||
|
FieldSource,
|
||||||
|
field,
|
||||||
|
mergeField,
|
||||||
|
TripContext,
|
||||||
|
PriceStatus,
|
||||||
|
SleepingDataQuality
|
||||||
|
} from './types';
|
||||||
|
import { parseJsonLd } from './parsers/jsonld';
|
||||||
|
import { parseCapacityFacts, parseRating, parseHost, parseMaxGuests, extractVisibleText, parseTitle } from './parsers/text-patterns';
|
||||||
|
import { extractLocation } from './parsers/location';
|
||||||
|
import { extractPrice } from './parsers/price';
|
||||||
|
import { parseSleepingArrangements, calculateSleepingStats, deriveSleepingFromBeds, BED_TYPE_CONFIG } from './parsers/sleeping';
|
||||||
|
|
||||||
|
// Enable stealth mode
|
||||||
|
import Stealth from 'puppeteer-extra-plugin-stealth';
|
||||||
|
puppeteer.use(Stealth());
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Main Puppeteer-based scraper that actually renders JavaScript
|
||||||
|
*/
|
||||||
|
export async function scrapeAirbnbWithPuppeteer(
|
||||||
|
url: string,
|
||||||
|
options?: { tripContext?: TripContext }
|
||||||
|
): Promise<ExtractedListing | null> {
|
||||||
|
let browser: Browser | null = null;
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Launch browser with stealth mode
|
||||||
|
browser = await puppeteer.launch({
|
||||||
|
headless: true,
|
||||||
|
args: [
|
||||||
|
'--no-sandbox',
|
||||||
|
'--disable-setuid-sandbox',
|
||||||
|
'--disable-dev-shm-usage',
|
||||||
|
'--disable-gpu',
|
||||||
|
'--window-size=1920,1080',
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
const page: Page = await browser.newPage();
|
||||||
|
|
||||||
|
// Set realistic viewport and user agent
|
||||||
|
await page.setViewport({ width: 1920, height: 1080 });
|
||||||
|
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
|
||||||
|
|
||||||
|
// Navigate and wait for network idle
|
||||||
|
console.log(`[Puppeteer] Navigating to ${url}`);
|
||||||
|
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
|
||||||
|
|
||||||
|
// Wait a bit more for dynamic content
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 2000));
|
||||||
|
|
||||||
|
// Check if we got a 404 or challenge page
|
||||||
|
const pageTitle = await page.title();
|
||||||
|
if (pageTitle.includes('404') || pageTitle.includes('Not Found')) {
|
||||||
|
console.error('[Puppeteer] Got 404 page');
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[Puppeteer] Page title: ${pageTitle}`);
|
||||||
|
|
||||||
|
// Get rendered HTML
|
||||||
|
const html = await page.content();
|
||||||
|
const $ = cheerio.load(html);
|
||||||
|
|
||||||
|
// Extract visible text for pattern matching
|
||||||
|
const visibleText = extractVisibleText(html);
|
||||||
|
console.log(`[Puppeteer] Extracted ${visibleText.length} chars of visible text`);
|
||||||
|
|
||||||
|
// Run all parsers
|
||||||
|
const jsonldData = parseJsonLd($);
|
||||||
|
console.log(`[Puppeteer] JSON-LD: title=${!!jsonldData.title}, images=${jsonldData.images.length}`);
|
||||||
|
|
||||||
|
const capacityFacts = parseCapacityFacts(visibleText);
|
||||||
|
console.log(`[Puppeteer] Capacity: ${JSON.stringify(capacityFacts)}`);
|
||||||
|
|
||||||
|
const ratingFacts = parseRating(visibleText);
|
||||||
|
const hostName = parseHost(visibleText);
|
||||||
|
const maxGuests = parseMaxGuests(visibleText);
|
||||||
|
|
||||||
|
// Try to get sleeping arrangements from the rendered page
|
||||||
|
const sleepingOptions = await parseSleepingArrangementsFromPage(page);
|
||||||
|
console.log(`[Puppeteer] Sleeping options: ${sleepingOptions.length} found`);
|
||||||
|
|
||||||
|
const tripContext: TripContext = {
|
||||||
|
checkIn: options?.tripContext?.checkIn || undefined,
|
||||||
|
checkOut: options?.tripContext?.checkOut || undefined,
|
||||||
|
adults: options?.tripContext?.adults || 4,
|
||||||
|
};
|
||||||
|
|
||||||
|
const priceData = extractPrice(html, $, tripContext);
|
||||||
|
|
||||||
|
// Use JSON-LD price as fallback if price extraction failed
|
||||||
|
if (jsonldData.price !== null && priceData.nightly.value === null) {
|
||||||
|
priceData.nightly = { value: jsonldData.price, source: 'jsonld', confidence: 'medium' };
|
||||||
|
priceData.status = 'EXTRACTED';
|
||||||
|
|
||||||
|
// Calculate total if trip context available
|
||||||
|
if (tripContext.checkIn && tripContext.checkOut) {
|
||||||
|
try {
|
||||||
|
const checkIn = new Date(tripContext.checkIn);
|
||||||
|
const checkOut = new Date(tripContext.checkOut);
|
||||||
|
const nights = Math.round((checkOut.getTime() - checkIn.getTime()) / (1000 * 60 * 60 * 24));
|
||||||
|
if (nights > 0) {
|
||||||
|
priceData.total = { value: jsonldData.price * nights, source: 'derived', confidence: 'medium' };
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Invalid dates, skip total calculation
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const locationData = extractLocation($, html);
|
||||||
|
const pageTitleParsed = parseTitle(html);
|
||||||
|
|
||||||
|
// Extract images from the rendered page (more reliable)
|
||||||
|
const images = extractImagesFromPage($);
|
||||||
|
console.log(`[Puppeteer] Found ${images.length} images`);
|
||||||
|
|
||||||
|
// Extract description from rendered page
|
||||||
|
const description = extractDescriptionFromPage($);
|
||||||
|
|
||||||
|
// Extract amenities if not in JSON-LD
|
||||||
|
const amenities = jsonldData.amenities.length > 0
|
||||||
|
? jsonldData.amenities
|
||||||
|
: extractAmenitiesFromPage($);
|
||||||
|
console.log(`[Puppeteer] Found ${amenities.length} amenities`);
|
||||||
|
|
||||||
|
// Build the result
|
||||||
|
const result: ExtractedListing = {
|
||||||
|
originalUrl: url,
|
||||||
|
normalizedUrl: url,
|
||||||
|
externalId: extractExternalId(url),
|
||||||
|
|
||||||
|
// Title - try multiple sources
|
||||||
|
title: mergeField(
|
||||||
|
jsonldData.title ? field(jsonldData.title, 'jsonld', 'high') : null,
|
||||||
|
pageTitleParsed ? field(pageTitleParsed, 'text_pattern', 'medium') : null
|
||||||
|
),
|
||||||
|
|
||||||
|
description: mergeField(
|
||||||
|
jsonldData.description ? field(jsonldData.description, 'jsonld', 'high') : null,
|
||||||
|
description ? field(description, 'dom', 'medium') : null
|
||||||
|
),
|
||||||
|
|
||||||
|
// Location
|
||||||
|
locationText: locationData.locationText.value
|
||||||
|
? field(locationData.locationText.value, locationData.locationText.source, locationData.locationText.confidence)
|
||||||
|
: field(null, 'derived', 'low'),
|
||||||
|
latitude: locationData.latitude,
|
||||||
|
longitude: locationData.longitude,
|
||||||
|
|
||||||
|
// Pricing
|
||||||
|
tripContext,
|
||||||
|
nightlyPrice: priceData.nightly,
|
||||||
|
totalPrice: priceData.total,
|
||||||
|
priceStatus: priceData.status,
|
||||||
|
|
||||||
|
// Rating
|
||||||
|
rating: mergeField(
|
||||||
|
ratingFacts ? field(ratingFacts.rating, 'text_pattern', 'high') : null,
|
||||||
|
jsonldData.rating ? field(jsonldData.rating, 'jsonld', 'medium') : null
|
||||||
|
),
|
||||||
|
reviewCount: mergeField(
|
||||||
|
ratingFacts && ratingFacts.reviewCount > 0 ? field(ratingFacts.reviewCount, 'text_pattern', 'high') : null,
|
||||||
|
jsonldData.reviewCount ? field(jsonldData.reviewCount, 'jsonld', 'medium') : null
|
||||||
|
),
|
||||||
|
|
||||||
|
// Capacity
|
||||||
|
guestCount: mergeField(
|
||||||
|
capacityFacts ? field(capacityFacts.guests, 'text_pattern', 'high') : null,
|
||||||
|
maxGuests ? field(maxGuests, 'text_pattern', 'medium') : null
|
||||||
|
),
|
||||||
|
officialGuestCount: mergeField(
|
||||||
|
maxGuests ? field(maxGuests, 'text_pattern', 'high') : null,
|
||||||
|
field(null, 'derived', 'low')
|
||||||
|
),
|
||||||
|
bedrooms: mergeField(
|
||||||
|
capacityFacts ? field(capacityFacts.bedrooms, 'text_pattern', 'high') : null,
|
||||||
|
field(null, 'derived', 'low')
|
||||||
|
),
|
||||||
|
beds: mergeField(
|
||||||
|
capacityFacts ? field(capacityFacts.beds, 'text_pattern', 'high') : null,
|
||||||
|
field(null, 'derived', 'low')
|
||||||
|
),
|
||||||
|
bathrooms: mergeField(
|
||||||
|
capacityFacts ? field(capacityFacts.bathrooms, 'text_pattern', 'high') : null,
|
||||||
|
field(null, 'derived', 'low')
|
||||||
|
),
|
||||||
|
|
||||||
|
// Sleeping
|
||||||
|
sleepingOptions,
|
||||||
|
maxSleepingPlaces: 0,
|
||||||
|
suitableFor4: false,
|
||||||
|
extraMattressesNeededFor4: 0,
|
||||||
|
sleepingDataQuality: 'UNKNOWN',
|
||||||
|
|
||||||
|
// Host
|
||||||
|
hostName: mergeField(
|
||||||
|
hostName ? field(hostName, 'text_pattern', 'high') : null,
|
||||||
|
jsonldData.hostName ? field(jsonldData.hostName, 'jsonld', 'medium') : null
|
||||||
|
),
|
||||||
|
|
||||||
|
// Amenities
|
||||||
|
amenities,
|
||||||
|
|
||||||
|
// Images
|
||||||
|
images,
|
||||||
|
coverImage: images[0] || null,
|
||||||
|
|
||||||
|
// Other
|
||||||
|
cancellationPolicy: jsonldData.cancellationPolicy
|
||||||
|
? field(jsonldData.cancellationPolicy, 'jsonld', 'high')
|
||||||
|
: field(null, 'derived', 'low'),
|
||||||
|
|
||||||
|
// Debug
|
||||||
|
rawSnippets: {
|
||||||
|
title: jsonldData.title || pageTitleParsed || '',
|
||||||
|
visibleText: visibleText.substring(0, 2000),
|
||||||
|
},
|
||||||
|
extractionLog: [
|
||||||
|
`Puppeteer render: ${url}`,
|
||||||
|
`Page title: ${pageTitle}`,
|
||||||
|
`Images found: ${images.length}`,
|
||||||
|
`Amenities found: ${amenities.length}`,
|
||||||
|
`Capacity: ${JSON.stringify(capacityFacts)}`,
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
// Calculate sleeping stats
|
||||||
|
if (sleepingOptions.length > 0) {
|
||||||
|
const stats = calculateSleepingStats(sleepingOptions);
|
||||||
|
result.maxSleepingPlaces = stats.maxSleepingPlaces;
|
||||||
|
result.suitableFor4 = stats.suitableFor4;
|
||||||
|
result.extraMattressesNeededFor4 = stats.extraMattressesNeededFor4;
|
||||||
|
result.sleepingDataQuality = 'EXACT';
|
||||||
|
} else if (result.beds.value && result.guestCount.value) {
|
||||||
|
const derivedOptions = deriveSleepingFromBeds(result.beds.value, result.guestCount.value);
|
||||||
|
const stats = calculateSleepingStats(derivedOptions);
|
||||||
|
result.sleepingOptions = derivedOptions;
|
||||||
|
result.maxSleepingPlaces = stats.maxSleepingPlaces;
|
||||||
|
result.suitableFor4 = stats.suitableFor4;
|
||||||
|
result.extraMattressesNeededFor4 = stats.extraMattressesNeededFor4;
|
||||||
|
result.sleepingDataQuality = 'DERIVED';
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[Puppeteer] Scraper error:', error);
|
||||||
|
return null;
|
||||||
|
} finally {
|
||||||
|
if (browser) {
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract external ID from URL
|
||||||
|
*/
|
||||||
|
function extractExternalId(url: string): string | null {
|
||||||
|
const match = url.match(/\/rooms\/(\d+)/);
|
||||||
|
return match?.[1] || null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract images from the rendered page
|
||||||
|
*/
|
||||||
|
function extractImagesFromPage($: cheerio.CheerioAPI): string[] {
|
||||||
|
const images: string[] = [];
|
||||||
|
|
||||||
|
// Try og:image
|
||||||
|
const ogImage = $('meta[property="og:image"]').attr('content');
|
||||||
|
if (ogImage) images.push(ogImage);
|
||||||
|
|
||||||
|
// Try JSON-LD images (already handled separately)
|
||||||
|
|
||||||
|
// Try data-testid image elements
|
||||||
|
$('[data-testid*="photo"] img, [data-testid*="image"] img, [class*="photo"] img').each((_, el) => {
|
||||||
|
const src = $(el).attr('src') || $(el).attr('data-src') || $(el).attr('data-image');
|
||||||
|
if (src && src.startsWith('http') && !images.includes(src)) {
|
||||||
|
images.push(src);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return images;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract description from the rendered page
|
||||||
|
*/
|
||||||
|
function extractDescriptionFromPage($: cheerio.CheerioAPI): string | null {
|
||||||
|
// Try various selectors for description
|
||||||
|
const selectors = [
|
||||||
|
'[data-section-id="DESCRIPTION_DEFAULT"]',
|
||||||
|
'#description',
|
||||||
|
'.description',
|
||||||
|
'[itemprop="description"]',
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const selector of selectors) {
|
||||||
|
const text = $(selector).text().trim();
|
||||||
|
if (text.length > 20) {
|
||||||
|
return text.substring(0, 500);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract amenities from the rendered page
|
||||||
|
*/
|
||||||
|
function extractAmenitiesFromPage($: cheerio.CheerioAPI): string[] {
|
||||||
|
const amenities: string[] = [];
|
||||||
|
|
||||||
|
$('[data-testid*="amenity"]').each((_, el) => {
|
||||||
|
const text = $(el).text().trim();
|
||||||
|
if (text && !amenities.includes(text)) {
|
||||||
|
amenities.push(text);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return amenities;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Map BedType to spotsPerUnit using BED_TYPE_CONFIG
|
||||||
|
*/
|
||||||
|
const BED_TYPE_SPOTS_MAP: Record<string, number> = (() => {
|
||||||
|
const map: Record<string, number> = {};
|
||||||
|
for (const config of Object.values(BED_TYPE_CONFIG)) {
|
||||||
|
if (!(config.type in map)) {
|
||||||
|
map[config.type] = config.spots;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return map;
|
||||||
|
})();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Try to parse sleeping arrangements from Puppeteer page
|
||||||
|
* This is more reliable than text parsing
|
||||||
|
*/
|
||||||
|
async function parseSleepingArrangementsFromPage(page: Page): Promise<ExtractedListing['sleepingOptions']> {
|
||||||
|
const options: ExtractedListing['sleepingOptions'] = [];
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Try to find sleeping/bedroom section
|
||||||
|
const sleepingSection = await page.$('[data-section-id="SLEEPING_CONFIGURATION"]');
|
||||||
|
|
||||||
|
if (sleepingSection) {
|
||||||
|
const text = await sleepingSection.evaluate(el => el.textContent);
|
||||||
|
|
||||||
|
// Parse bed types from text
|
||||||
|
const bedPatterns = [
|
||||||
|
/(\d+)\s*(?:×|x)?\s*(queen|king|single|double|twin|full|king-size|queen-size)\s*bed/gi,
|
||||||
|
/(\d+)\s*(?:×|x)?\s*Futon/gi,
|
||||||
|
/(\d+)\s*(?:×|x)?\s*Matratze/gi,
|
||||||
|
/(\d+)\s*(?:×|x)?\s*Couch/gi,
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const pattern of bedPatterns) {
|
||||||
|
let match;
|
||||||
|
while ((match = pattern.exec(text)) !== null) {
|
||||||
|
const quantity = parseInt(match[1], 10);
|
||||||
|
const bedType = match[2] || 'bed';
|
||||||
|
|
||||||
|
// Map German/English bed types to BedType enum
|
||||||
|
let normalizedType: import('./types').BedType = 'UNKNOWN';
|
||||||
|
let quality: 'FULL' | 'AUXILIARY' = 'AUXILIARY';
|
||||||
|
|
||||||
|
const lower = bedType.toLowerCase();
|
||||||
|
if (lower.includes('queen')) {
|
||||||
|
normalizedType = 'QUEEN';
|
||||||
|
quality = 'FULL';
|
||||||
|
} else if (lower.includes('king')) {
|
||||||
|
normalizedType = 'KING';
|
||||||
|
quality = 'FULL';
|
||||||
|
} else if (lower.includes('double') || lower.includes('full')) {
|
||||||
|
normalizedType = 'DOUBLE';
|
||||||
|
quality = 'FULL';
|
||||||
|
} else if (lower.includes('twin') || lower.includes('single')) {
|
||||||
|
normalizedType = 'SINGLE';
|
||||||
|
quality = 'FULL';
|
||||||
|
} else if (lower.includes('futon')) {
|
||||||
|
normalizedType = 'FUTON';
|
||||||
|
quality = 'AUXILIARY';
|
||||||
|
} else if (lower.includes('matratze') || lower.includes('mattress')) {
|
||||||
|
normalizedType = 'EXTRA_MATTRESS';
|
||||||
|
quality = 'AUXILIARY';
|
||||||
|
} else if (lower.includes('couch') || lower.includes('sofa')) {
|
||||||
|
normalizedType = 'SOFA';
|
||||||
|
quality = 'AUXILIARY';
|
||||||
|
} else {
|
||||||
|
normalizedType = 'DOUBLE';
|
||||||
|
quality = 'FULL';
|
||||||
|
}
|
||||||
|
|
||||||
|
options.push({
|
||||||
|
bedType: normalizedType,
|
||||||
|
quantity,
|
||||||
|
spotsPerUnit: BED_TYPE_SPOTS_MAP[normalizedType] ?? 2,
|
||||||
|
quality,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[Puppeteer] Error parsing sleeping arrangements:', error);
|
||||||
|
}
|
||||||
|
|
||||||
|
return options;
|
||||||
|
}
|
||||||
96
test-scraper-debug.ts
Normal file
96
test-scraper-debug.ts
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
/**
|
||||||
|
* Debug test - captures more info about what's happening
|
||||||
|
*/
|
||||||
|
|
||||||
|
import puppeteer from 'puppeteer-extra';
|
||||||
|
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||||
|
|
||||||
|
puppeteer.use(StealthPlugin());
|
||||||
|
|
||||||
|
const TEST_URL = 'https://www.airbnb.com/rooms/842937876795894279';
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
console.log('Starting debug test...\n');
|
||||||
|
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: true,
|
||||||
|
args: [
|
||||||
|
'--no-sandbox',
|
||||||
|
'--disable-setuid-sandbox',
|
||||||
|
'--disable-dev-shm-usage',
|
||||||
|
'--disable-gpu',
|
||||||
|
'--window-size=1920,1080',
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
|
||||||
|
await page.setViewport({ width: 1920, height: 1080 });
|
||||||
|
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
|
||||||
|
|
||||||
|
console.log(`Navigating to: ${TEST_URL}`);
|
||||||
|
|
||||||
|
// Track redirects
|
||||||
|
page.on('response', (response) => {
|
||||||
|
const status = response.status();
|
||||||
|
const url = response.url();
|
||||||
|
if (status >= 300 && status < 400) {
|
||||||
|
console.log(`🔄 Redirect: ${status} → ${response.headers()['location']?.substring(0, 100)}`);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await page.goto(TEST_URL, {
|
||||||
|
waitUntil: 'networkidle2',
|
||||||
|
timeout: 60000
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`\n📊 Response status: ${response?.status()}`);
|
||||||
|
console.log(`📊 Final URL: ${page.url()}`);
|
||||||
|
console.log(`📊 Page title: ${await page.title()}`);
|
||||||
|
|
||||||
|
// Wait longer for dynamic content
|
||||||
|
console.log('\n⏳ Waiting 5 seconds for dynamic content...');
|
||||||
|
await new Promise(r => setTimeout(r, 5000));
|
||||||
|
|
||||||
|
// Get page content
|
||||||
|
const html = await page.content();
|
||||||
|
console.log(`\n📄 HTML length: ${html.length} chars`);
|
||||||
|
|
||||||
|
// Check for challenge page
|
||||||
|
if (html.includes('challenge') || html.includes('captcha') || html.includes('blocked')) {
|
||||||
|
console.log('⚠️ Possible challenge/blocked page detected!');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if we're on the homepage
|
||||||
|
if (page.url() === 'https://www.airbnb.com/' || page.url() === 'https://www.airbnb.com') {
|
||||||
|
console.log('⚠️ Redirected to homepage - likely blocked!');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract visible text
|
||||||
|
const bodyText = await page.evaluate(() => document.body.innerText);
|
||||||
|
console.log(`\n📝 Body text length: ${bodyText.length} chars`);
|
||||||
|
console.log(`\n📝 First 500 chars of visible text:\n${bodyText.substring(0, 500)}`);
|
||||||
|
|
||||||
|
// Check for specific listing elements
|
||||||
|
const hasListingTitle = await page.$('[data-plugin-in-point-id="TITLE_DEFAULT"]');
|
||||||
|
const hasPhotos = await page.$('[data-section-id="PHOTO_PICKER"]');
|
||||||
|
const hasPrice = await page.$('[data-plugin-in-point-id="PRICE_DEFAULT"]');
|
||||||
|
|
||||||
|
console.log(`\n🔍 Listing elements found:`);
|
||||||
|
console.log(` Title section: ${hasListingTitle ? '✅' : '❌'}`);
|
||||||
|
console.log(` Photos section: ${hasPhotos ? '✅' : '❌'}`);
|
||||||
|
console.log(` Price section: ${hasPrice ? '✅' : '❌'}`);
|
||||||
|
|
||||||
|
// Take a screenshot
|
||||||
|
await page.screenshot({ path: 'debug-screenshot.png', fullPage: false });
|
||||||
|
console.log(`\n📸 Screenshot saved to: debug-screenshot.png`);
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error:', error);
|
||||||
|
} finally {
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
127
test-scraper.ts
Normal file
127
test-scraper.ts
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
/**
|
||||||
|
* Test script for Puppeteer-based Airbnb scraper
|
||||||
|
* Run with: npx tsx test-scraper.ts
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { scrapeAirbnbWithPuppeteer } from './src/lib/airbnb/puppeteer-scraper';
|
||||||
|
|
||||||
|
const TEST_URL = 'https://www.airbnb.com/rooms/52367822'; // Valid listing in Bad Bellingen, Germany
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
console.log('========================================');
|
||||||
|
console.log('Airbnb Puppeteer Scraper Test');
|
||||||
|
console.log('========================================\n');
|
||||||
|
|
||||||
|
console.log(`Testing URL: ${TEST_URL}\n`);
|
||||||
|
console.log('Starting scraper (this may take 30-60 seconds)...\n');
|
||||||
|
|
||||||
|
const startTime = Date.now();
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await scrapeAirbnbWithPuppeteer(TEST_URL);
|
||||||
|
|
||||||
|
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
||||||
|
console.log(`\n✅ Scraping completed in ${elapsed}s\n`);
|
||||||
|
|
||||||
|
if (!result) {
|
||||||
|
console.log('❌ Result is null - scraping may have failed');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('========================================');
|
||||||
|
console.log('EXTRACTED DATA');
|
||||||
|
console.log('========================================\n');
|
||||||
|
|
||||||
|
// Title
|
||||||
|
console.log('📌 TITLE:');
|
||||||
|
console.log(` Value: ${result.title?.value || 'N/A'}`);
|
||||||
|
console.log(` Source: ${result.title?.source || 'N/A'}`);
|
||||||
|
console.log(` Confidence: ${result.title?.confidence || 'N/A'}\n`);
|
||||||
|
|
||||||
|
// Price
|
||||||
|
console.log('💰 PRICE:');
|
||||||
|
console.log(` Nightly: ${result.nightlyPrice?.value || 'N/A'} EUR`);
|
||||||
|
console.log(` Total: ${result.totalPrice?.value || 'N/A'} EUR`);
|
||||||
|
console.log(` Status: ${result.priceStatus || 'N/A'}\n`);
|
||||||
|
|
||||||
|
// Location
|
||||||
|
console.log('📍 LOCATION:');
|
||||||
|
console.log(` Text: ${result.locationText?.value || 'N/A'}`);
|
||||||
|
console.log(` Lat/Lng: ${result.latitude}, ${result.longitude}\n`);
|
||||||
|
|
||||||
|
// Rating
|
||||||
|
console.log('⭐ RATING:');
|
||||||
|
console.log(` Rating: ${result.rating?.value || 'N/A'}`);
|
||||||
|
console.log(` Reviews: ${result.reviewCount?.value || 'N/A'}\n`);
|
||||||
|
|
||||||
|
// Capacity
|
||||||
|
console.log('🏠 CAPACITY:');
|
||||||
|
console.log(` Guests: ${result.guestCount?.value || 'N/A'}`);
|
||||||
|
console.log(` Bedrooms: ${result.bedrooms?.value || 'N/A'}`);
|
||||||
|
console.log(` Beds: ${result.beds?.value || 'N/A'}`);
|
||||||
|
console.log(` Bathrooms: ${result.bathrooms?.value || 'N/A'}\n`);
|
||||||
|
|
||||||
|
// Sleeping Options
|
||||||
|
console.log('🛏️ SLEEPING OPTIONS:');
|
||||||
|
if (result.sleepingOptions && result.sleepingOptions.length > 0) {
|
||||||
|
result.sleepingOptions.forEach((opt, i) => {
|
||||||
|
console.log(` ${i + 1}. ${opt.quantity}x ${opt.bedType} (${opt.spotsPerUnit} spots, ${opt.quality})`);
|
||||||
|
});
|
||||||
|
console.log(` Max sleeping places: ${result.maxSleepingPlaces}`);
|
||||||
|
console.log(` Suitable for 4: ${result.suitableFor4 ? '✅ Yes' : '❌ No'}`);
|
||||||
|
console.log(` Quality: ${result.sleepingDataQuality}`);
|
||||||
|
} else {
|
||||||
|
console.log(' No sleeping options extracted');
|
||||||
|
}
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
// Host
|
||||||
|
console.log('👤 HOST:');
|
||||||
|
console.log(` Name: ${result.hostName?.value || 'N/A'}\n`);
|
||||||
|
|
||||||
|
// Images
|
||||||
|
console.log('🖼️ IMAGES:');
|
||||||
|
console.log(` Count: ${result.images?.length || 0}`);
|
||||||
|
if (result.images && result.images.length > 0) {
|
||||||
|
console.log(` First 3:`);
|
||||||
|
result.images.slice(0, 3).forEach((img, i) => {
|
||||||
|
console.log(` ${i + 1}. ${img.substring(0, 80)}...`);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
// Amenities
|
||||||
|
console.log('✨ AMENITIES:');
|
||||||
|
console.log(` Count: ${result.amenities?.length || 0}`);
|
||||||
|
if (result.amenities && result.amenities.length > 0) {
|
||||||
|
console.log(` First 10: ${result.amenities.slice(0, 10).join(', ')}`);
|
||||||
|
}
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
// Description
|
||||||
|
console.log('📝 DESCRIPTION:');
|
||||||
|
const desc = result.description?.value || 'N/A';
|
||||||
|
console.log(` ${desc.substring(0, 200)}${desc.length > 200 ? '...' : ''}\n`);
|
||||||
|
|
||||||
|
// External ID
|
||||||
|
console.log('🔗 EXTERNAL ID:');
|
||||||
|
console.log(` ${result.externalId || 'N/A'}\n`);
|
||||||
|
|
||||||
|
// Extraction Log
|
||||||
|
console.log('📋 EXTRACTION LOG:');
|
||||||
|
result.extractionLog?.forEach(log => {
|
||||||
|
console.log(` - ${log}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('\n========================================');
|
||||||
|
console.log('TEST COMPLETE');
|
||||||
|
console.log('========================================');
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
||||||
|
console.log(`\n❌ Error after ${elapsed}s:`);
|
||||||
|
console.error(error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(console.error);
|
||||||
Loading…
x
Reference in New Issue
Block a user