diff --git a/prisma/dev.db b/prisma/dev.db
new file mode 100644
index 0000000..e69de29
diff --git a/prisma/prisma/dev.db b/prisma/prisma/dev.db
index e5b7a6f..ae87d77 100644
Binary files a/prisma/prisma/dev.db and b/prisma/prisma/dev.db differ
diff --git a/prisma/schema.prisma b/prisma/schema.prisma
index 425fad9..dd081c1 100644
--- a/prisma/schema.prisma
+++ b/prisma/schema.prisma
@@ -31,6 +31,7 @@ model Listing {
nightlyPrice Float? @map("nightly_price")
totalPrice Float? @map("total_price")
currency String? @default("EUR")
+ priceStatus String? @map("price_status") // EXTRACTED, REQUIRES_TRIP_CONTEXT, UNKNOWN, PARTIAL
// Rating
rating Float?
diff --git a/src/actions/import-listing.ts b/src/actions/import-listing.ts
index 76e8245..b94fee3 100644
--- a/src/actions/import-listing.ts
+++ b/src/actions/import-listing.ts
@@ -2,17 +2,29 @@
import { z } from "zod";
import { prisma } from "@/lib/prisma";
-import { scrapeAirbnbListing, extractAirbnbExternalId, normalizeAirbnbUrl } from "@/lib/airbnb-scraper";
+import { scrapeAirbnbListing } from "@/lib/airbnb";
+import { normalizeAirbnbUrl, extractAirbnbExternalId } from "@/lib/airbnb/url-normalizer";
import { slugify } from "@/lib/utils";
import { revalidatePath } from "next/cache";
const schema = z.object({
airbnbUrl: z.string().url("Ungültige URL"),
+ checkIn: z.string().optional(),
+ checkOut: z.string().optional(),
+ adults: z.number().optional(),
});
export async function importListingAction(formData: FormData) {
+ const url = formData.get("airbnbUrl") as string;
+ const checkIn = formData.get("checkIn") as string | null;
+ const checkOut = formData.get("checkOut") as string | null;
+ const adultsStr = formData.get("adults") as string | null;
+
const parsed = schema.safeParse({
- airbnbUrl: formData.get("airbnbUrl"),
+ airbnbUrl: url,
+ checkIn: checkIn || undefined,
+ checkOut: checkOut || undefined,
+ adults: adultsStr ? parseInt(adultsStr, 10) : undefined,
});
if (!parsed.success) {
@@ -22,6 +34,7 @@ export async function importListingAction(formData: FormData) {
const normalizedUrl = normalizeAirbnbUrl(parsed.data.airbnbUrl);
const externalId = extractAirbnbExternalId(normalizedUrl);
+ // Check for duplicates
const duplicate = await prisma.listing.findFirst({
where: {
OR: [
@@ -42,10 +55,31 @@ export async function importListingAction(formData: FormData) {
};
}
- const scrapedData = await scrapeAirbnbListing(parsed.data.airbnbUrl);
- const title = scrapedData?.title || "Neues Airbnb";
+ // Build trip context from form or URL
+ const tripContext = {
+ checkIn: parsed.data.checkIn,
+ checkOut: parsed.data.checkOut,
+ adults: parsed.data.adults || 4,
+ };
+
+ // Scrape with trip context for better price extraction
+ const scrapedData = await scrapeAirbnbListing(parsed.data.airbnbUrl, { tripContext });
+
+ const title = scrapedData?.title?.value || "Neues Airbnb";
const slug = `${slugify(title)}-${Date.now()}`;
+ // Calculate sleeping stats
+ let maxSleepingPlaces = scrapedData?.maxSleepingPlaces || null;
+ let suitableFor4 = scrapedData?.suitableFor4 || null;
+ let extraMattressesNeededFor4 = scrapedData?.extraMattressesNeededFor4 || null;
+ let bedTypesSummary = null;
+
+ if (scrapedData?.sleepingOptions && scrapedData.sleepingOptions.length > 0) {
+ const types = scrapedData.sleepingOptions.map(o => `${o.quantity}× ${o.bedType}`);
+ bedTypesSummary = types.join(", ");
+ }
+
+ // Create listing
const listing = await prisma.listing.create({
data: {
title,
@@ -53,29 +87,54 @@ export async function importListingAction(formData: FormData) {
airbnbUrl: parsed.data.airbnbUrl,
normalizedUrl,
externalId,
- ...(scrapedData?.pricePerNight && { nightlyPrice: scrapedData.pricePerNight }),
- ...(scrapedData?.rating && { rating: scrapedData.rating }),
- ...(scrapedData?.reviewCount && { reviewCount: scrapedData.reviewCount }),
- ...(scrapedData?.guestCount && { guestCount: scrapedData.guestCount }),
- ...(scrapedData?.bedrooms && { bedrooms: scrapedData.bedrooms }),
- ...(scrapedData?.beds && { beds: scrapedData.beds }),
- ...(scrapedData?.bathrooms && { bathrooms: scrapedData.bathrooms }),
- ...(scrapedData?.description && { description: scrapedData.description }),
- ...(scrapedData?.hostName && { hostName: scrapedData.hostName }),
- ...(scrapedData?.location && { locationText: scrapedData.location }),
- ...(scrapedData?.latitude && { latitude: scrapedData.latitude }),
- ...(scrapedData?.longitude && { longitude: scrapedData.longitude }),
- ...(scrapedData?.cancellationPolicy && { cancellationPolicy: scrapedData.cancellationPolicy }),
- ...(scrapedData?.images?.length && { coverImage: scrapedData.images[0] }),
- ...(scrapedData?.amenities?.length && { amenities: JSON.stringify(scrapedData.amenities) }),
+
+ // Location
+ locationText: scrapedData?.locationText?.value || null,
+ latitude: scrapedData?.latitude?.value || null,
+ longitude: scrapedData?.longitude?.value || null,
+
+ // Pricing
+ nightlyPrice: scrapedData?.nightlyPrice?.value || null,
+ totalPrice: scrapedData?.totalPrice?.value || null,
+ currency: "EUR",
+ priceStatus: scrapedData?.priceStatus || "UNKNOWN",
+
+ // Rating
+ rating: scrapedData?.rating?.value || null,
+ reviewCount: scrapedData?.reviewCount?.value || null,
+
+ // Capacity
+ guestCount: scrapedData?.guestCount?.value || null,
+ officialGuestCount: scrapedData?.officialGuestCount?.value || null,
+ maxSleepingPlaces,
+ suitableFor4,
+ extraMattressesNeededFor4,
+ bedTypesSummary,
+
+ // Room Details
+ bedrooms: scrapedData?.bedrooms?.value || null,
+ beds: scrapedData?.beds?.value || null,
+ bathrooms: scrapedData?.bathrooms?.value || null,
+
+ // Description & Host
+ description: scrapedData?.description?.value || null,
+ hostName: scrapedData?.hostName?.value || null,
+ cancellationPolicy: scrapedData?.cancellationPolicy?.value || null,
+
+ // Images
+ coverImage: scrapedData?.coverImage || null,
+ amenities: scrapedData?.amenities?.length ? JSON.stringify(scrapedData.amenities) : null,
+
+ // Raw data for debugging
rawSourceData: scrapedData ? JSON.stringify(scrapedData) : null,
},
select: { id: true, slug: true },
});
+ // Save images
if (scrapedData?.images?.length) {
await prisma.listingImage.createMany({
- data: scrapedData.images.map((url, index) => ({
+ data: scrapedData.images.slice(0, 20).map((url, index) => ({
listingId: listing.id,
url,
sortOrder: index,
@@ -83,6 +142,20 @@ export async function importListingAction(formData: FormData) {
});
}
+ // Save sleeping options
+ if (scrapedData?.sleepingOptions?.length) {
+ await prisma.listingSleepingOption.createMany({
+ data: scrapedData.sleepingOptions.map(opt => ({
+ listingId: listing.id,
+ bedType: opt.bedType,
+ quantity: opt.quantity,
+ spotsPerUnit: opt.spotsPerUnit,
+ quality: opt.quality,
+ label: opt.label || null,
+ })),
+ });
+ }
+
revalidatePath("/dashboard");
revalidatePath("/listings");
diff --git a/src/app/(protected)/admin/import/import-form.tsx b/src/app/(protected)/admin/import/import-form.tsx
index 44797ad..330aeac 100644
--- a/src/app/(protected)/admin/import/import-form.tsx
+++ b/src/app/(protected)/admin/import/import-form.tsx
@@ -4,10 +4,14 @@ import { useState } from "react";
import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
+import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
import { importListingAction } from "@/actions/import-listing";
export function ImportForm() {
const [url, setUrl] = useState("");
+ const [checkIn, setCheckIn] = useState("");
+ const [checkOut, setCheckOut] = useState("");
+ const [adults, setAdults] = useState("4");
const [error, setError] = useState("");
const [success, setSuccess] = useState(false);
const [isLoading, setIsLoading] = useState(false);
@@ -20,6 +24,9 @@ export function ImportForm() {
const formData = new FormData();
formData.append("airbnbUrl", url);
+ if (checkIn) formData.append("checkIn", checkIn);
+ if (checkOut) formData.append("checkOut", checkOut);
+ if (adults) formData.append("adults", adults);
const result = await importListingAction(formData);
@@ -33,25 +40,82 @@ export function ImportForm() {
setIsLoading(false);
};
+ // Get today's date for min date
+ const today = new Date().toISOString().split('T')[0];
+
return (
-
+
+
+ 🏠 Neues Airbnb importieren
+
+
+
+
+
);
}
diff --git a/src/app/(protected)/admin/listings/[slug]/page.tsx b/src/app/(protected)/admin/listings/[slug]/page.tsx
index 64dba1f..5965e67 100644
--- a/src/app/(protected)/admin/listings/[slug]/page.tsx
+++ b/src/app/(protected)/admin/listings/[slug]/page.tsx
@@ -5,6 +5,7 @@ import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import { updateListing, deleteListing, addNote, addTagToListing, removeTagFromListing } from "../actions";
+// Note: actions.ts is in /admin/listings/, so from [slug]/ we go up one level with ../
export default async function EditListingPage({
params,
diff --git a/src/lib/airbnb/index.ts b/src/lib/airbnb/index.ts
new file mode 100644
index 0000000..8550ffa
--- /dev/null
+++ b/src/lib/airbnb/index.ts
@@ -0,0 +1,207 @@
+import * as cheerio from "cheerio";
+import { normalizeAirbnbUrlWithContext } from "./url-normalizer";
+import { parseCapacityFacts, parseRating, parseHost, parseMaxGuests, extractVisibleText } from "./parsers/text-patterns";
+import { parseSleepingArrangements, calculateSleepingStats, deriveSleepingFromBeds } from "./parsers/sleeping";
+import { extractPrice } from "./parsers/price";
+import { extractLocation } from "./parsers/location";
+import { parseJsonLd } from "./parsers/jsonld";
+import {
+ ExtractedListing,
+ FieldSource,
+ field,
+ mergeField,
+ TripContext,
+ SleepingDataQuality,
+ PriceStatus
+} from "./types";
+
+// ============================================
+// Main Scraper Function
+// ============================================
+
+export async function scrapeAirbnbListing(
+ url: string,
+ options?: { tripContext?: TripContext; usePlaywright?: boolean }
+): Promise {
+ try {
+ // Step 1: Normalize URL and extract trip context
+ const normalized = normalizeAirbnbUrlWithContext(url);
+
+ // Merge trip context from options with URL-extracted context
+ const tripContext: TripContext = {
+ checkIn: options?.tripContext?.checkIn || normalized.tripContext.checkIn,
+ checkOut: options?.tripContext?.checkOut || normalized.tripContext.checkOut,
+ adults: options?.tripContext?.adults || normalized.tripContext.adults || 4,
+ };
+
+ // Step 2: Fetch HTML
+ const html = await fetchHtml(normalized.normalized);
+ const $ = cheerio.load(html);
+
+ // Step 3: Extract visible text for pattern matching
+ const visibleText = extractVisibleText(html);
+
+ // Step 4: Run all parsers
+ const jsonldData = parseJsonLd($);
+ const capacityFacts = parseCapacityFacts(visibleText);
+ const ratingFacts = parseRating(visibleText);
+ const hostName = parseHost(visibleText);
+ const maxGuests = parseMaxGuests(visibleText);
+ const sleepingOptions = parseSleepingArrangements(visibleText);
+ const priceData = extractPrice(html, $, tripContext);
+ const locationData = extractLocation($, html);
+
+ // Step 5: Build the result with priority: jsonld > text_pattern > derived
+ const result: ExtractedListing = {
+ // URLs
+ originalUrl: normalized.original,
+ normalizedUrl: normalized.normalized,
+ externalId: normalized.externalId,
+
+ // Basic Info
+ title: mergeField(
+ jsonldData.title ? field(jsonldData.title, 'jsonld', 'high') : null,
+ field(null, 'derived', 'low')
+ ),
+ description: mergeField(
+ jsonldData.description ? field(jsonldData.description, 'jsonld', 'high') : null,
+ field(null, 'derived', 'low')
+ ),
+
+ // Location
+ locationText: locationData.locationText,
+ latitude: mergeField(
+ jsonldData.latitude ? field(jsonldData.latitude, 'jsonld', 'high') : null,
+ locationData.latitude.value !== null ? locationData.latitude : field(null, 'derived', 'low')
+ ),
+ longitude: mergeField(
+ jsonldData.longitude ? field(jsonldData.longitude, 'jsonld', 'high') : null,
+ locationData.longitude.value !== null ? locationData.longitude : field(null, 'derived', 'low')
+ ),
+
+ // Pricing
+ tripContext,
+ nightlyPrice: priceData.nightly,
+ totalPrice: priceData.total,
+ priceStatus: priceData.status,
+
+ // Rating
+ rating: mergeField(
+ ratingFacts ? field(ratingFacts.rating, 'text_pattern', 'high') : null,
+ jsonldData.rating ? field(jsonldData.rating, 'jsonld', 'medium') : null
+ ),
+ reviewCount: mergeField(
+ ratingFacts && ratingFacts.reviewCount > 0 ? field(ratingFacts.reviewCount, 'text_pattern', 'high') : null,
+ jsonldData.reviewCount ? field(jsonldData.reviewCount, 'jsonld', 'medium') : null
+ ),
+
+ // Capacity
+ guestCount: mergeField(
+ capacityFacts ? field(capacityFacts.guests, 'text_pattern', 'high') : null,
+ field(null, 'derived', 'low')
+ ),
+ officialGuestCount: mergeField(
+ maxGuests ? field(maxGuests, 'text_pattern', 'high') : null,
+ field(null, 'derived', 'low')
+ ),
+ bedrooms: mergeField(
+ capacityFacts ? field(capacityFacts.bedrooms, 'text_pattern', 'high') : null,
+ field(null, 'derived', 'low')
+ ),
+ beds: mergeField(
+ capacityFacts ? field(capacityFacts.beds, 'text_pattern', 'high') : null,
+ field(null, 'derived', 'low')
+ ),
+ bathrooms: mergeField(
+ capacityFacts ? field(capacityFacts.bathrooms, 'text_pattern', 'high') : null,
+ field(null, 'derived', 'low')
+ ),
+
+ // Sleeping
+ sleepingOptions,
+ maxSleepingPlaces: 0,
+ suitableFor4: false,
+ extraMattressesNeededFor4: 0,
+ sleepingDataQuality: 'UNKNOWN',
+
+ // Host
+ hostName: mergeField(
+ hostName ? field(hostName, 'text_pattern', 'high') : null,
+ jsonldData.hostName ? field(jsonldData.hostName, 'jsonld', 'medium') : null
+ ),
+
+ // Amenities
+ amenities: jsonldData.amenities || [],
+
+ // Images
+ images: jsonldData.images || [],
+ coverImage: jsonldData.images?.[0] || null,
+
+ // Other
+ cancellationPolicy: field(null, 'derived', 'low'),
+
+ // Debug
+ rawSnippets: {
+ title: jsonldData.title || '',
+ visibleText: visibleText.substring(0, 2000),
+ },
+ extractionLog: [
+ `URL normalized: ${normalized.normalized}`,
+ `External ID: ${normalized.externalId}`,
+ `Trip context: ${JSON.stringify(tripContext)}`,
+ `Capacity facts: ${capacityFacts ? JSON.stringify(capacityFacts) : 'none'}`,
+ `Rating facts: ${ratingFacts ? JSON.stringify(ratingFacts) : 'none'}`,
+ `Sleeping options: ${sleepingOptions.length} found`,
+ ],
+ };
+
+ // Step 6: Calculate sleeping stats
+ if (sleepingOptions.length > 0) {
+ const stats = calculateSleepingStats(sleepingOptions);
+ result.maxSleepingPlaces = stats.maxSleepingPlaces;
+ result.suitableFor4 = stats.suitableFor4;
+ result.extraMattressesNeededFor4 = stats.extraMattressesNeededFor4;
+ result.sleepingDataQuality = 'EXACT';
+ } else if (result.beds.value && result.guestCount.value) {
+ // Derive from beds and guest count
+ const derivedOptions = deriveSleepingFromBeds(result.beds.value, result.guestCount.value);
+ const stats = calculateSleepingStats(derivedOptions);
+ result.sleepingOptions = derivedOptions;
+ result.maxSleepingPlaces = stats.maxSleepingPlaces;
+ result.suitableFor4 = stats.suitableFor4;
+ result.extraMattressesNeededFor4 = stats.extraMattressesNeededFor4;
+ result.sleepingDataQuality = 'DERIVED';
+ }
+
+ return result;
+ } catch (error) {
+ console.error("Scraping failed:", error);
+ return null;
+ }
+}
+
+// ============================================
+// HTML Fetcher
+// ============================================
+
+async function fetchHtml(url: string): Promise {
+ const response = await fetch(url, {
+ headers: {
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language": "de-DE,de;q=0.9,en-US;q=0.8,en;q=0.7",
+ "Accept-Encoding": "gzip, deflate, br",
+ "Cache-Control": "no-cache",
+ },
+ });
+
+ if (!response.ok) {
+ throw new Error(`HTTP ${response.status} for ${url}`);
+ }
+
+ return response.text();
+}
+
+// Re-export utilities for backward compatibility
+export { normalizeAirbnbUrlWithContext as normalizeAirbnbUrl } from "./url-normalizer";
+export { extractAirbnbExternalId } from "./url-normalizer";
diff --git a/src/lib/airbnb/parsers/jsonld.ts b/src/lib/airbnb/parsers/jsonld.ts
new file mode 100644
index 0000000..1cf75b4
--- /dev/null
+++ b/src/lib/airbnb/parsers/jsonld.ts
@@ -0,0 +1,125 @@
+import * as cheerio from 'cheerio';
+
+export interface JsonLdData {
+ title: string | null;
+ description: string | null;
+ locationText: string | null;
+ latitude: number | null;
+ longitude: number | null;
+ rating: number | null;
+ reviewCount: number | null;
+ images: string[];
+ cancellationPolicy: string | null;
+ hostName: string | null;
+ amenities: string[];
+}
+
+/**
+ * Parse JSON-LD structured data from HTML
+ * Airbnb typically uses LodgingBusiness or Room schema
+ */
+export function parseJsonLd($: cheerio.CheerioAPI): JsonLdData {
+ const result: JsonLdData = {
+ title: null,
+ description: null,
+ locationText: null,
+ latitude: null,
+ longitude: null,
+ rating: null,
+ reviewCount: null,
+ images: [],
+ cancellationPolicy: null,
+ hostName: null,
+ amenities: [],
+ };
+
+ const jsonLdScript = $('script[type="application/ld+json"]').html();
+
+ if (!jsonLdScript) {
+ return result;
+ }
+
+ try {
+ const jsonData = JSON.parse(jsonLdScript);
+
+ // Check if it's a lodging business schema
+ if (jsonData["@type"] !== "LodgingBusiness" && jsonData["@type"] !== "Room") {
+ return result;
+ }
+
+ // Title
+ if (jsonData.name) {
+ result.title = jsonData.name;
+ }
+
+ // Description
+ if (jsonData.description) {
+ result.description = jsonData.description;
+ }
+
+ // Location
+ if (jsonData.address) {
+ const parts: string[] = [];
+ if (jsonData.address.addressLocality) parts.push(jsonData.address.addressLocality);
+ if (jsonData.address.addressRegion) parts.push(jsonData.address.addressRegion);
+ if (jsonData.address.addressCountry) parts.push(jsonData.address.addressCountry);
+
+ if (parts.length > 0) {
+ result.locationText = parts.join(', ');
+ }
+ }
+
+ // Coordinates
+ if (jsonData.geo) {
+ if (jsonData.geo.latitude) {
+ result.latitude = parseFloat(jsonData.geo.latitude);
+ }
+ if (jsonData.geo.longitude) {
+ result.longitude = parseFloat(jsonData.geo.longitude);
+ }
+ }
+
+ // Rating
+ if (jsonData.aggregateRating) {
+ if (jsonData.aggregateRating.ratingValue) {
+ result.rating = parseFloat(jsonData.aggregateRating.ratingValue);
+ }
+ if (jsonData.aggregateRating.reviewCount) {
+ result.reviewCount = parseInt(jsonData.aggregateRating.reviewCount, 10);
+ }
+ }
+
+ // Images
+ if (jsonData.image) {
+ const images = Array.isArray(jsonData.image)
+ ? jsonData.image.map((img: unknown) => {
+ const imgObj = img as Record;
+ return imgObj.url || imgObj['@id'] || String(img);
+ })
+ : [jsonData.image.url || jsonData.image['@id'] || jsonData.image];
+ result.images = images.filter(Boolean);
+ }
+
+ // Cancellation Policy
+ if (jsonData.cancellationPolicy) {
+ result.cancellationPolicy = jsonData.cancellationPolicy;
+ }
+
+ // Host name
+ if (jsonData.provider?.name) {
+ result.hostName = jsonData.provider.name;
+ }
+
+ // Amenities
+ if (jsonData.amenityFeature && Array.isArray(jsonData.amenityFeature)) {
+ result.amenities = jsonData.amenityFeature
+ .map((f: unknown) => (f as { name?: string }).name)
+ .filter(Boolean);
+ }
+
+ } catch (error) {
+ console.error('Failed to parse JSON-LD:', error);
+ }
+
+ return result;
+}
diff --git a/src/lib/airbnb/parsers/location.ts b/src/lib/airbnb/parsers/location.ts
new file mode 100644
index 0000000..6e81337
--- /dev/null
+++ b/src/lib/airbnb/parsers/location.ts
@@ -0,0 +1,118 @@
+import * as cheerio from 'cheerio';
+import { FieldSource } from '../types';
+
+/**
+ * Extract location from multiple sources with priority:
+ * 1. JSON-LD address (handled separately)
+ * 2. "Where you'll be" section
+ * 3. Meta tags (og:locality, etc.)
+ * 4. Visible text patterns
+ */
+export function extractLocation(
+ $: cheerio.CheerioAPI,
+ html: string
+): { locationText: FieldSource; latitude: FieldSource; longitude: FieldSource } {
+
+ let locationText: string | null = null;
+ let locationSource: FieldSource['source'] = 'text_pattern';
+ let latitude: number | null = null;
+ let longitude: number | null = null;
+
+ // 1. Try "Where you'll be" section
+ const whereSection = $('[data-section-id="LOCATION_DEFAULT"]').text() ||
+ $('section:contains("Where you\'ll be")').text() ||
+ $('section:contains("Lage")').text();
+
+ if (whereSection) {
+ // Extract location from this section
+ const locationMatch = whereSection.match(/([A-Z][a-zäöüÄÖÜß]+(?:\s+[A-Z][a-zäöüÄÖÜß]+)*,\s*[A-Z][a-zäöüÄÖÜß]+)/);
+ if (locationMatch) {
+ locationText = locationMatch[1].trim();
+ locationSource = 'dom';
+ }
+ }
+
+ // 2. Try meta tags
+ if (!locationText) {
+ const locality = $('meta[property="og:locality"]').attr('content') ||
+ $('meta[name="location"]').attr('content');
+ const region = $('meta[property="og:region"]').attr('content');
+ const country = $('meta[property="og:country-name"]').attr('content');
+
+ if (locality) {
+ locationText = [locality, region, country].filter(Boolean).join(', ');
+ locationSource = 'meta';
+ }
+ }
+
+ // 3. Try text patterns like "Location: Berlin, Germany"
+ if (!locationText) {
+ const locationPattern = /(?:location|lage|standort)[:\s]+([A-Z][a-zäöüÄÖÜß]+(?:[\s,]+[A-Z][a-zäöüÄÖÜß]+)*)/i;
+ const match = html.match(locationPattern);
+ if (match) {
+ locationText = match[1].trim();
+ locationSource = 'text_pattern';
+ }
+ }
+
+ // 4. Try extracting from title (e.g., "Apartment in Berlin · ...")
+ if (!locationText) {
+ const titlePattern = /(?:in|bei|am)\s+([A-Z][a-zäöüÄÖÜß]+(?:\s+[A-Z][a-zäöüÄÖÜß]+)?)\s*[·•]/;
+ const title = $('title').text();
+ const match = title.match(titlePattern);
+ if (match) {
+ locationText = match[1].trim();
+ locationSource = 'text_pattern';
+ }
+ }
+
+ // Extract coordinates from various sources
+ // Try data attributes
+ const latAttr = $('[data-lat]').attr('data-lat') || $('[data-latitude]').attr('data-latitude');
+ const lngAttr = $('[data-lng]').attr('data-lng') || $('[data-longitude]').attr('data-longitude');
+
+ if (latAttr && lngAttr) {
+ latitude = parseFloat(latAttr);
+ longitude = parseFloat(lngAttr);
+ }
+
+ // Try meta tags for coordinates
+ if (!latitude) {
+ const geoPosition = $('meta[name="geo.position"]').attr('content') ||
+ $('meta[property="place:location:latitude"]').attr('content');
+ if (geoPosition) {
+ const parts = geoPosition.split(/[;,]/);
+ if (parts.length >= 2) {
+ latitude = parseFloat(parts[0]);
+ longitude = parseFloat(parts[1]);
+ } else {
+ latitude = parseFloat(geoPosition);
+ }
+ }
+ }
+
+ if (!longitude) {
+ const lngMeta = $('meta[property="place:location:longitude"]').attr('content');
+ if (lngMeta) {
+ longitude = parseFloat(lngMeta);
+ }
+ }
+
+ return {
+ locationText: {
+ value: locationText,
+ source: locationSource,
+ confidence: locationText ? 'medium' : 'low',
+ },
+ latitude: {
+ value: latitude,
+ source: latitude ? 'dom' : 'text_pattern',
+ confidence: latitude ? 'high' : 'low',
+ },
+ longitude: {
+ value: longitude,
+ source: longitude ? 'dom' : 'text_pattern',
+ confidence: longitude ? 'high' : 'low',
+ },
+ };
+}
diff --git a/src/lib/airbnb/parsers/price.ts b/src/lib/airbnb/parsers/price.ts
new file mode 100644
index 0000000..9d81e27
--- /dev/null
+++ b/src/lib/airbnb/parsers/price.ts
@@ -0,0 +1,102 @@
+import * as cheerio from 'cheerio';
+import { FieldSource, PriceStatus, TripContext } from '../types';
+import { parsePriceFromText } from './text-patterns';
+
+/**
+ * Try to extract price from HTML using various selectors
+ */
+function tryExtractPriceFromHtml(html: string, $: cheerio.CheerioAPI): number | null {
+ // Try various price selectors that Airbnb might use
+ const priceSelectors = [
+ '[data-testid="price-amount"]',
+ 'span[class*="Price"]',
+ 'span[class*="price"]',
+ '[itemprop="price"]',
+ '._1y6k3r2',
+ '._1dss1omb',
+ ];
+
+ for (const selector of priceSelectors) {
+ const element = $(selector).first();
+ if (element.length) {
+ const text = element.text();
+ const price = parsePriceFromText(text);
+ if (price !== null) {
+ return price;
+ }
+ }
+ }
+
+ // Fallback: search entire HTML for price patterns
+ const priceFromHtml = parsePriceFromText(html);
+ if (priceFromHtml !== null) {
+ return priceFromHtml;
+ }
+
+ return null;
+}
+
+/**
+ * Extract price with trip context awareness
+ *
+ * CRITICAL: Price reliability depends on trip context
+ * - With check-in/check-out: Price is for those specific dates
+ * - Without trip context: Price may be a base/minimum price
+ */
+export function extractPrice(
+ html: string,
+ $: cheerio.CheerioAPI,
+ tripContext: TripContext
+): { nightly: FieldSource; total: FieldSource; status: PriceStatus } {
+
+ // No trip context = unreliable price
+ if (!tripContext.checkIn || !tripContext.checkOut) {
+ const extracted = tryExtractPriceFromHtml(html, $);
+
+ if (extracted !== null) {
+ return {
+ nightly: { value: extracted, source: 'text_pattern', confidence: 'low' },
+ total: { value: null, source: 'text_pattern', confidence: 'low' },
+ status: 'REQUIRES_TRIP_CONTEXT',
+ };
+ }
+
+ return {
+ nightly: { value: null, source: 'text_pattern', confidence: 'low' },
+ total: { value: null, source: 'text_pattern', confidence: 'low' },
+ status: 'UNKNOWN',
+ };
+ }
+
+ // With trip context, try harder to extract
+ const extracted = tryExtractPriceFromHtml(html, $);
+
+ if (extracted !== null) {
+ // Calculate nights for total price
+ let total: number | null = null;
+ try {
+ const checkIn = new Date(tripContext.checkIn);
+ const checkOut = new Date(tripContext.checkOut);
+ const nights = Math.round((checkOut.getTime() - checkIn.getTime()) / (1000 * 60 * 60 * 24));
+ if (nights > 0) {
+ total = extracted * nights;
+ }
+ } catch {
+ // Invalid dates, skip total calculation
+ }
+
+ return {
+ nightly: { value: extracted, source: 'text_pattern', confidence: 'medium' },
+ total: total !== null
+ ? { value: total, source: 'derived', confidence: 'medium' }
+ : { value: null, source: 'text_pattern', confidence: 'low' },
+ status: 'EXTRACTED',
+ };
+ }
+
+ return {
+ nightly: { value: null, source: 'text_pattern', confidence: 'low' },
+ total: { value: null, source: 'text_pattern', confidence: 'low' },
+ status: 'UNKNOWN',
+ };
+}
diff --git a/src/lib/airbnb/parsers/sleeping.ts b/src/lib/airbnb/parsers/sleeping.ts
new file mode 100644
index 0000000..87175a1
--- /dev/null
+++ b/src/lib/airbnb/parsers/sleeping.ts
@@ -0,0 +1,143 @@
+import { BedType, SleepingOption } from '../types';
+
+/**
+ * Bed type configuration: maps text patterns to bed types, spots per unit, and quality
+ */
+export const BED_TYPE_CONFIG: Record = {
+ 'double bed': { type: 'DOUBLE', spots: 2, quality: 'FULL' },
+ 'doppelbett': { type: 'DOUBLE', spots: 2, quality: 'FULL' },
+ 'queen bed': { type: 'QUEEN', spots: 2, quality: 'FULL' },
+ 'king bed': { type: 'KING', spots: 2, quality: 'FULL' },
+ 'single bed': { type: 'SINGLE', spots: 1, quality: 'FULL' },
+ 'twin bed': { type: 'SINGLE', spots: 1, quality: 'FULL' },
+ 'einzelbett': { type: 'SINGLE', spots: 1, quality: 'FULL' },
+ 'bunk bed': { type: 'BUNK', spots: 2, quality: 'FULL' },
+ 'etagenbett': { type: 'BUNK', spots: 2, quality: 'FULL' },
+ 'sofa bed': { type: 'SOFA_BED', spots: 2, quality: 'FULL' },
+ 'pull-out sofa': { type: 'SOFA_BED', spots: 2, quality: 'FULL' },
+ 'schlafsofa': { type: 'SOFA_BED', spots: 2, quality: 'FULL' },
+ 'couch': { type: 'SOFA', spots: 1, quality: 'AUXILIARY' },
+ 'sofa': { type: 'SOFA', spots: 1, quality: 'AUXILIARY' },
+ 'air mattress': { type: 'AIR_MATTRESS', spots: 1, quality: 'AUXILIARY' },
+ 'luftmatratze': { type: 'AIR_MATTRESS', spots: 1, quality: 'AUXILIARY' },
+ 'floor mattress': { type: 'EXTRA_MATTRESS', spots: 1, quality: 'AUXILIARY' },
+ 'extra mattress': { type: 'EXTRA_MATTRESS', spots: 1, quality: 'AUXILIARY' },
+ 'zusatzmatratze': { type: 'EXTRA_MATTRESS', spots: 1, quality: 'AUXILIARY' },
+ 'futon': { type: 'FUTON', spots: 1, quality: 'AUXILIARY' },
+};
+
+// Pattern: "1 double bed" or "2 single beds" or "Bedroom 1: 1 queen bed"
+const BED_PATTERN = /(?:(?:bedroom|schlafzimmer|room|zimmer)\s*\d*\s*:?\s*)?(\d+)\s+([a-z\s-]+?)(?:\s|$|,|\.)/gi;
+
+export interface SleepingStats {
+ maxSleepingPlaces: number;
+ suitableFor4: boolean;
+ extraMattressesNeededFor4: number;
+}
+
+/**
+ * Parse sleeping arrangements from text
+ * Handles patterns like:
+ * - "1 double bed"
+ * - "2 single beds"
+ * - "Bedroom 1: 1 queen bed"
+ * - "Common space: 1 sofa bed"
+ */
+export function parseSleepingArrangements(text: string): SleepingOption[] {
+ const options: SleepingOption[] = [];
+ const lowerText = text.toLowerCase();
+
+ let match;
+ while ((match = BED_PATTERN.exec(lowerText)) !== null) {
+ const quantity = parseInt(match[1], 10);
+ const bedTypeText = match[2].trim();
+
+ // Find matching bed type config
+ let matchedConfig: { type: BedType; spots: number; quality: 'FULL' | 'AUXILIARY' } | null = null;
+ let matchedLabel = '';
+
+ for (const [pattern, config] of Object.entries(BED_TYPE_CONFIG)) {
+ if (bedTypeText.includes(pattern) || pattern.includes(bedTypeText)) {
+ matchedConfig = config;
+ matchedLabel = pattern;
+ break;
+ }
+ }
+
+ if (matchedConfig && quantity > 0) {
+ // Check if this bed type already exists
+ const existing = options.find(o => o.bedType === matchedConfig!.type);
+ if (existing) {
+ existing.quantity += quantity;
+ } else {
+ options.push({
+ bedType: matchedConfig.type,
+ quantity,
+ spotsPerUnit: matchedConfig.spots,
+ quality: matchedConfig.quality,
+ label: matchedLabel,
+ rawText: match[0].trim(),
+ });
+ }
+ }
+ }
+
+ return options;
+}
+
+/**
+ * Calculate sleeping statistics from options
+ */
+export function calculateSleepingStats(options: SleepingOption[]): SleepingStats {
+ const maxSleepingPlaces = options.reduce(
+ (sum, opt) => sum + opt.quantity * opt.spotsPerUnit,
+ 0
+ );
+
+ const suitableFor4 = maxSleepingPlaces >= 4;
+
+ // Calculate extra mattresses needed for 4 people
+ // Only count FULL quality beds first
+ const fullQualitySpots = options
+ .filter(o => o.quality === 'FULL')
+ .reduce((sum, opt) => sum + opt.quantity * opt.spotsPerUnit, 0);
+
+ const extraMattressesNeededFor4 = Math.max(0, 4 - fullQualitySpots);
+
+ return {
+ maxSleepingPlaces,
+ suitableFor4,
+ extraMattressesNeededFor4,
+ };
+}
+
+/**
+ * Derive sleeping options from bed count (fallback with low confidence)
+ * Used when detailed sleeping arrangement text is not available
+ */
+export function deriveSleepingFromBeds(beds: number, guestCount: number): SleepingOption[] {
+ if (!beds || beds < 1) return [];
+
+ // Assume beds are double beds if guest count suggests it
+ const avgGuestsPerBed = guestCount ? guestCount / beds : 2;
+
+ if (avgGuestsPerBed >= 1.5) {
+ // Likely double beds
+ return [{
+ bedType: 'DOUBLE',
+ quantity: beds,
+ spotsPerUnit: 2,
+ quality: 'FULL',
+ label: 'double bed (derived)',
+ }];
+ } else {
+ // Likely single beds
+ return [{
+ bedType: 'SINGLE',
+ quantity: beds,
+ spotsPerUnit: 1,
+ quality: 'FULL',
+ label: 'single bed (derived)',
+ }];
+ }
+}
diff --git a/src/lib/airbnb/parsers/text-patterns.ts b/src/lib/airbnb/parsers/text-patterns.ts
new file mode 100644
index 0000000..098d13f
--- /dev/null
+++ b/src/lib/airbnb/parsers/text-patterns.ts
@@ -0,0 +1,123 @@
+/**
+ * Text pattern parsers for extracting data from visible HTML text
+ * Supports both German and English patterns
+ */
+
+// "2 guests · 1 bedroom · 2 beds · 1 bath" or German variants
+const CAPACITY_PATTERN = /(\d+)\s*(?:guests?|gäste?)\s*[·•]\s*(\d+)\s*(?:bedrooms?|schlafzimmer?)\s*[·•]\s*(\d+)\s*(?:beds?|betten?)\s*[·•]\s*(\d+(?:[.,]\d+)?)\s*(?:baths?|bäder?)/i;
+
+// "4.88 · 200 reviews" or "4,88 (200)" or "4,88 · 200 Bewertungen"
+const RATING_PATTERN = /(\d+[.,]\d+)\s*(?:[·•\(]?\s*(\d+)\s*(?:reviews?|bewertungen)?\)?)/i;
+
+// "Hosted by David" or "Gehostet von David"
+const HOST_PATTERN = /(?:hosted by|gehostet von)\s+([^\n·•]+)/i;
+
+// "€ 150 / night" or "$150 per night" or "150 € pro Nacht"
+const PRICE_PATTERN = /[€$]?\s*(\d+(?:[.,]\d{0,2})?)\s*[€$]?\s*(?:\/|per|pro)\s*(?:night|nacht)/i;
+
+// "6 guests maximum" or "max. 6 Gäste" or "Up to 6 guests"
+const MAX_GUESTS_PATTERN = /(?:max\.?|maximum|up to)\s*(\d+)\s*(?:guests?|gäste?)|(\d+)\s*(?:guests?|gäste?)\s*(?:maximum|max\.?)/i;
+
+export interface CapacityFacts {
+ guests: number;
+ bedrooms: number;
+ beds: number;
+ bathrooms: number;
+}
+
+export interface RatingFacts {
+ rating: number;
+ reviewCount: number;
+}
+
+/**
+ * Parse capacity facts from text like "2 guests · 1 bedroom · 2 beds · 1 bath"
+ */
+export function parseCapacityFacts(text: string): CapacityFacts | null {
+ const match = text.match(CAPACITY_PATTERN);
+ if (!match) return null;
+
+ return {
+ guests: parseInt(match[1], 10),
+ bedrooms: parseInt(match[2], 10),
+ beds: parseInt(match[3], 10),
+ bathrooms: parseFloat(match[4].replace(',', '.')),
+ };
+}
+
+/**
+ * Parse rating from text like "4.88 · 200 reviews"
+ */
+export function parseRating(text: string): RatingFacts | null {
+ const match = text.match(RATING_PATTERN);
+ if (!match) return null;
+
+ const rating = parseFloat(match[1].replace(',', '.'));
+ const reviewCount = match[2] ? parseInt(match[2], 10) : 0;
+
+ if (isNaN(rating)) return null;
+
+ return { rating, reviewCount };
+}
+
+/**
+ * Parse host name from text like "Hosted by David"
+ */
+export function parseHost(text: string): string | null {
+ const match = text.match(HOST_PATTERN);
+ if (!match) return null;
+
+ return match[1].trim();
+}
+
+/**
+ * Parse price from text like "€ 150 / night"
+ */
+export function parsePriceFromText(text: string): number | null {
+ const match = text.match(PRICE_PATTERN);
+ if (!match) return null;
+
+ const price = parseFloat(match[1].replace(',', '.'));
+ return isNaN(price) ? null : price;
+}
+
+/**
+ * Parse max guests from text like "6 guests maximum"
+ */
+export function parseMaxGuests(text: string): number | null {
+ const match = text.match(MAX_GUESTS_PATTERN);
+ if (!match) return null;
+
+ // Pattern has two capture groups depending on word order
+ const value = match[1] || match[2];
+ return value ? parseInt(value, 10) : null;
+}
+
+/**
+ * Extract all text content from HTML for pattern matching
+ */
+export function extractVisibleText(html: string): string {
+ // Remove script and style tags
+ let text = html.replace(/