- Add field() and mergeField() helper functions to types.ts - Fix location parser to use correct html parameter - Add priceStatus to import action - Import form already has trip context fields (checkIn, checkOut, adults) - Build now passes successfully
103 lines
3.0 KiB
TypeScript
103 lines
3.0 KiB
TypeScript
import * as cheerio from 'cheerio';
|
|
import { FieldSource, PriceStatus, TripContext } from '../types';
|
|
import { parsePriceFromText } from './text-patterns';
|
|
|
|
/**
|
|
* Try to extract price from HTML using various selectors
|
|
*/
|
|
function tryExtractPriceFromHtml(html: string, $: cheerio.CheerioAPI): number | null {
|
|
// Try various price selectors that Airbnb might use
|
|
const priceSelectors = [
|
|
'[data-testid="price-amount"]',
|
|
'span[class*="Price"]',
|
|
'span[class*="price"]',
|
|
'[itemprop="price"]',
|
|
'._1y6k3r2',
|
|
'._1dss1omb',
|
|
];
|
|
|
|
for (const selector of priceSelectors) {
|
|
const element = $(selector).first();
|
|
if (element.length) {
|
|
const text = element.text();
|
|
const price = parsePriceFromText(text);
|
|
if (price !== null) {
|
|
return price;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Fallback: search entire HTML for price patterns
|
|
const priceFromHtml = parsePriceFromText(html);
|
|
if (priceFromHtml !== null) {
|
|
return priceFromHtml;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Extract price with trip context awareness
|
|
*
|
|
* CRITICAL: Price reliability depends on trip context
|
|
* - With check-in/check-out: Price is for those specific dates
|
|
* - Without trip context: Price may be a base/minimum price
|
|
*/
|
|
export function extractPrice(
|
|
html: string,
|
|
$: cheerio.CheerioAPI,
|
|
tripContext: TripContext
|
|
): { nightly: FieldSource<number | null>; total: FieldSource<number | null>; status: PriceStatus } {
|
|
|
|
// No trip context = unreliable price
|
|
if (!tripContext.checkIn || !tripContext.checkOut) {
|
|
const extracted = tryExtractPriceFromHtml(html, $);
|
|
|
|
if (extracted !== null) {
|
|
return {
|
|
nightly: { value: extracted, source: 'text_pattern', confidence: 'low' },
|
|
total: { value: null, source: 'text_pattern', confidence: 'low' },
|
|
status: 'REQUIRES_TRIP_CONTEXT',
|
|
};
|
|
}
|
|
|
|
return {
|
|
nightly: { value: null, source: 'text_pattern', confidence: 'low' },
|
|
total: { value: null, source: 'text_pattern', confidence: 'low' },
|
|
status: 'UNKNOWN',
|
|
};
|
|
}
|
|
|
|
// With trip context, try harder to extract
|
|
const extracted = tryExtractPriceFromHtml(html, $);
|
|
|
|
if (extracted !== null) {
|
|
// Calculate nights for total price
|
|
let total: number | null = null;
|
|
try {
|
|
const checkIn = new Date(tripContext.checkIn);
|
|
const checkOut = new Date(tripContext.checkOut);
|
|
const nights = Math.round((checkOut.getTime() - checkIn.getTime()) / (1000 * 60 * 60 * 24));
|
|
if (nights > 0) {
|
|
total = extracted * nights;
|
|
}
|
|
} catch {
|
|
// Invalid dates, skip total calculation
|
|
}
|
|
|
|
return {
|
|
nightly: { value: extracted, source: 'text_pattern', confidence: 'medium' },
|
|
total: total !== null
|
|
? { value: total, source: 'derived', confidence: 'medium' }
|
|
: { value: null, source: 'text_pattern', confidence: 'low' },
|
|
status: 'EXTRACTED',
|
|
};
|
|
}
|
|
|
|
return {
|
|
nightly: { value: null, source: 'text_pattern', confidence: 'low' },
|
|
total: { value: null, source: 'text_pattern', confidence: 'low' },
|
|
status: 'UNKNOWN',
|
|
};
|
|
}
|