AI 13bbe9d147 fix: add field/mergeField helpers, priceStatus, trip context support
- Add field() and mergeField() helper functions to types.ts
- Fix location parser to use correct html parameter
- Add priceStatus to import action
- Import form already has trip context fields (checkIn, checkOut, adults)
- Build now passes successfully
2026-03-11 15:55:45 +00:00

103 lines
3.0 KiB
TypeScript

import * as cheerio from 'cheerio';
import { FieldSource, PriceStatus, TripContext } from '../types';
import { parsePriceFromText } from './text-patterns';
/**
* Try to extract price from HTML using various selectors
*/
function tryExtractPriceFromHtml(html: string, $: cheerio.CheerioAPI): number | null {
// Try various price selectors that Airbnb might use
const priceSelectors = [
'[data-testid="price-amount"]',
'span[class*="Price"]',
'span[class*="price"]',
'[itemprop="price"]',
'._1y6k3r2',
'._1dss1omb',
];
for (const selector of priceSelectors) {
const element = $(selector).first();
if (element.length) {
const text = element.text();
const price = parsePriceFromText(text);
if (price !== null) {
return price;
}
}
}
// Fallback: search entire HTML for price patterns
const priceFromHtml = parsePriceFromText(html);
if (priceFromHtml !== null) {
return priceFromHtml;
}
return null;
}
/**
* Extract price with trip context awareness
*
* CRITICAL: Price reliability depends on trip context
* - With check-in/check-out: Price is for those specific dates
* - Without trip context: Price may be a base/minimum price
*/
export function extractPrice(
html: string,
$: cheerio.CheerioAPI,
tripContext: TripContext
): { nightly: FieldSource<number | null>; total: FieldSource<number | null>; status: PriceStatus } {
// No trip context = unreliable price
if (!tripContext.checkIn || !tripContext.checkOut) {
const extracted = tryExtractPriceFromHtml(html, $);
if (extracted !== null) {
return {
nightly: { value: extracted, source: 'text_pattern', confidence: 'low' },
total: { value: null, source: 'text_pattern', confidence: 'low' },
status: 'REQUIRES_TRIP_CONTEXT',
};
}
return {
nightly: { value: null, source: 'text_pattern', confidence: 'low' },
total: { value: null, source: 'text_pattern', confidence: 'low' },
status: 'UNKNOWN',
};
}
// With trip context, try harder to extract
const extracted = tryExtractPriceFromHtml(html, $);
if (extracted !== null) {
// Calculate nights for total price
let total: number | null = null;
try {
const checkIn = new Date(tripContext.checkIn);
const checkOut = new Date(tripContext.checkOut);
const nights = Math.round((checkOut.getTime() - checkIn.getTime()) / (1000 * 60 * 60 * 24));
if (nights > 0) {
total = extracted * nights;
}
} catch {
// Invalid dates, skip total calculation
}
return {
nightly: { value: extracted, source: 'text_pattern', confidence: 'medium' },
total: total !== null
? { value: total, source: 'derived', confidence: 'medium' }
: { value: null, source: 'text_pattern', confidence: 'low' },
status: 'EXTRACTED',
};
}
return {
nightly: { value: null, source: 'text_pattern', confidence: 'low' },
total: { value: null, source: 'text_pattern', confidence: 'low' },
status: 'UNKNOWN',
};
}