import * as cheerio from 'cheerio'; import { FieldSource, PriceStatus, TripContext } from '../types'; import { parsePriceFromText } from './text-patterns'; /** * Try to extract price from HTML using various selectors */ function tryExtractPriceFromHtml(html: string, $: cheerio.CheerioAPI): number | null { // Try various price selectors that Airbnb might use const priceSelectors = [ '[data-testid="price-amount"]', 'span[class*="Price"]', 'span[class*="price"]', '[itemprop="price"]', '._1y6k3r2', '._1dss1omb', ]; for (const selector of priceSelectors) { const element = $(selector).first(); if (element.length) { const text = element.text(); const price = parsePriceFromText(text); if (price !== null) { return price; } } } // Fallback: search entire HTML for price patterns const priceFromHtml = parsePriceFromText(html); if (priceFromHtml !== null) { return priceFromHtml; } return null; } /** * Extract price with trip context awareness * * CRITICAL: Price reliability depends on trip context * - With check-in/check-out: Price is for those specific dates * - Without trip context: Price may be a base/minimum price */ export function extractPrice( html: string, $: cheerio.CheerioAPI, tripContext: TripContext ): { nightly: FieldSource; total: FieldSource; status: PriceStatus } { // No trip context = unreliable price if (!tripContext.checkIn || !tripContext.checkOut) { const extracted = tryExtractPriceFromHtml(html, $); if (extracted !== null) { return { nightly: { value: extracted, source: 'text_pattern', confidence: 'low' }, total: { value: null, source: 'text_pattern', confidence: 'low' }, status: 'REQUIRES_TRIP_CONTEXT', }; } return { nightly: { value: null, source: 'text_pattern', confidence: 'low' }, total: { value: null, source: 'text_pattern', confidence: 'low' }, status: 'UNKNOWN', }; } // With trip context, try harder to extract const extracted = tryExtractPriceFromHtml(html, $); if (extracted !== null) { // Calculate nights for total price let total: number | null = null; try { const checkIn = new Date(tripContext.checkIn); const checkOut = new Date(tripContext.checkOut); const nights = Math.round((checkOut.getTime() - checkIn.getTime()) / (1000 * 60 * 60 * 24)); if (nights > 0) { total = extracted * nights; } } catch { // Invalid dates, skip total calculation } return { nightly: { value: extracted, source: 'text_pattern', confidence: 'medium' }, total: total !== null ? { value: total, source: 'derived', confidence: 'medium' } : { value: null, source: 'text_pattern', confidence: 'low' }, status: 'EXTRACTED', }; } return { nightly: { value: null, source: 'text_pattern', confidence: 'low' }, total: { value: null, source: 'text_pattern', confidence: 'low' }, status: 'UNKNOWN', }; }