All files / lib/search cron.ts

89.69% Statements 87/97
83.33% Branches 75/90
60.86% Functions 14/23
100% Lines 73/73

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282                                                                                  1x             14x 14x 14x                                   14x                                   14x 14x 14x 14x 14x 14x 14x 14x 14x       14x 14x 14x 14x 2x 2x       14x 14x 14x     14x 14x     14x 14x                     14x 14x 2x 1x       14x   14x 53x   53x 14x                           14x         39x 13x 13x 4x 4x                                     13x         26x 13x 9x             13x   13x 13x 13x 13x 2x 2x     13x 3x 3x 3x 3x                                         13x         13x 13x 9x 8x 5x   13x                         13x             53x 53x       52x       13x 12x     13x   1x      
// Scheduled search index rebuild — runs directly via cron (same pattern as blog-reminders.ts).
// Creates DAL from env bindings and rebuilds all MiniSearch indexes into R2.
//
// Dependencies: search-service.ts, index-builder.ts (on develop branch)
// These imports resolve after merging with develop.
 
import { getDb } from "../../db";
import { createDal } from "../../dal";
import {
	businessTypes,
	serviceCategories,
	materialTags,
	cities,
	localities,
	zones,
	projects,
	proServiceCategories,
	proMaterialTags,
	proServiceAreas,
	type BusinessType,
	type ServiceCategory,
	type MaterialTag,
	type City,
	type Locality,
} from "../../db/schema";
import { and, desc, eq, inArray, isNotNull } from "drizzle-orm";
import { queryActiveTaxonomy } from "../../routes/taxonomy/helpers";
import { SearchService } from "./search-service";
import {
	buildProDocuments,
	buildProjectDocuments,
	buildRoomDocuments,
	buildBlogDocuments,
	groupRowsByProId,
	type ProRow,
	type ProjectRow,
	type RoomRow,
	type BlogRow,
} from "./index-builder";
 
type EntityType = "pro" | "project" | "room" | "blog";
const ALL_TYPES: EntityType[] = ["pro", "project", "room", "blog"];
 
/**
 * Rebuild all MiniSearch indexes and upload to R2.
 * Called by the scheduled handler on an hourly cron.
 */
export async function rebuildSearchIndexes(env: CloudflareBindings): Promise<void> {
	try {
		const db = getDb(env.DB);
		const dal = createDal(db);
 
		// Fetch all taxonomy + entity data in parallel
		const [
			rawBusinessTypes,
			rawServiceCategories,
			rawMaterialTags,
			rawCities,
			rawLocalities,
			rawZones,
			rawRoomTypes,
			rawBlogCategories,
			rawPros,
			rawProjects,
			rawBlogs,
			rawProCategories,
			rawProMaterials,
			rawProAreas,
		] = await Promise.all([
			queryActiveTaxonomy<BusinessType>(db, businessTypes),
			queryActiveTaxonomy<ServiceCategory>(db, serviceCategories),
			queryActiveTaxonomy<MaterialTag>(db, materialTags),
			queryActiveTaxonomy<City>(db, cities),
			db.select().from(localities),
			db.select().from(zones),
			dal.roomTypes.findAll(),
			dal.blogCategories.findAll({}, 0, 10000),
			dal.pros.findAll({ status: "published" }, 0, 100000),
			dal.projects.findAll({ status: "published" }, 0, 100000),
			dal.blogs.findAll({ status: "published" }, 0, 100000),
			db.select().from(proServiceCategories),
			db.select().from(proMaterialTags),
			db.select().from(proServiceAreas),
		]);
 
		// Build taxonomy lookup maps
		const businessTypeMap = new Map(rawBusinessTypes.map((bt) => [bt.id, bt.name]));
		const serviceCategoryMap = new Map(rawServiceCategories.map((sc) => [sc.id, sc.name]));
		const materialTagMap = new Map(rawMaterialTags.map((mt) => [mt.id, mt.name]));
		const cityMap = new Map(rawCities.map((c) => [c.id, c.name]));
		const localityMap = new Map((rawLocalities as Locality[]).map((l) => [l.id, l.name]));
		const roomTypeMap = new Map(rawRoomTypes.map((rt) => [rt.code, rt.displayName]));
		const blogCategoryMap = new Map(rawBlogCategories.map((bc) => [bc.id, bc.name]));
		const styleTagIdentityMap = new Map<string, string>();
		const roomMaterialIdentityMap = new Map<string, string>();
 
		// Resolve project city via locality → zone → city so the search index
		// has a `facetCityId` for projects/rooms (used for cityIds filter).
		const zoneToCity = new Map<string, string>();
		for (const z of rawZones) zoneToCity.set(z.id, z.cityId);
		const localityToCity = new Map<string, string>();
		for (const l of rawLocalities as Locality[]) {
			const cid = zoneToCity.get(l.zoneId);
			Iif (cid) localityToCity.set(l.id, cid);
		}
 
		// Group junction rows into per-pro maps for buildProDocuments.
		const proCategoriesMap = groupRowsByProId(rawProCategories, (r) => r.categoryId);
		const proMaterialsMap = groupRowsByProId(rawProMaterials, (r) => r.tagId);
		const proAreasMap = groupRowsByProId(rawProAreas, (r) => r.localityId);
 
		// Visibility filtering
		const publishedProIds = new Set(rawPros.map((p) => p.id));
		const proLookup = new Map(rawPros.map((p) => [p.id, p]));
 
		// Fetch cover images for pros
		const allProIds = rawPros.map((p) => p.id);
		const coverPhotos = allProIds.length > 0
			? await db.select({ proId: projects.proId, coverImage: projects.coverImage })
				.from(projects)
				.where(and(
					inArray(projects.proId, allProIds),
					eq(projects.status, "published"),
					isNotNull(projects.coverImage),
				))
				.orderBy(desc(projects.dateCreated))
			: [];
 
		const firstCoverByProId = new Map<string, string>();
		for (const row of coverPhotos) {
			if (row.coverImage && !firstCoverByProId.has(row.proId)) {
				firstCoverByProId.set(row.proId, row.coverImage);
			}
		}
 
		const stats: string[] = [];
 
		for (const type of ALL_TYPES) {
			let docs: ReturnType<typeof buildProDocuments> = [];
 
			if (type === "pro") {
				const proRows: ProRow[] = rawPros.map((p) => ({
					id: p.id,
					slug: p.slug ?? p.id,
					businessName: p.businessName,
					description: p.description,
					businessTypeId: p.businessTypeId,
					cityId: p.cityId,
					serviceCategoryIds: proCategoriesMap.get(p.id) ?? [],
					materialTagIds: proMaterialsMap.get(p.id) ?? [],
					serviceAreaIds: proAreasMap.get(p.id) ?? [],
					logoUrl: p.logoUrl,
					profileImage: p.profileImage,
					coverImage: firstCoverByProId.get(p.id) ?? null,
				}));
				docs = buildProDocuments(proRows, {
					businessTypes: businessTypeMap,
					cities: cityMap,
					serviceCategories: serviceCategoryMap,
				});
			} else if (type === "project") {
				const visibleProjects = rawProjects.filter((p) => publishedProIds.has(p.proId));
				const projectRows: ProjectRow[] = visibleProjects.map((p) => {
					const parentPro = proLookup.get(p.proId);
					return {
						id: p.id,
						slug: p.slug ?? p.id,
						title: p.title,
						description: p.description,
						propertyType: p.propertyType,
						localityId: p.localityId,
						cityId: p.localityId ? localityToCity.get(p.localityId) ?? null : null,
						styleTagIds: p.styleTagIds ? JSON.stringify(p.styleTagIds) : null,
						materialTagIds: p.materialTagIds ? JSON.stringify(p.materialTagIds) : null,
						coverImage: p.coverImage,
						proName: parentPro?.businessName ?? null,
						proLogo: parentPro?.logoUrl ?? parentPro?.profileImage ?? null,
						// Quality ranking fields (E3A, E7A). qualityScore is NOT NULL DEFAULT 50
						// in the DB after the marketplace_curation migration (E6B decision).
						proId: p.proId,
						qualityScore: p.qualityScore ?? 50,
					};
				});
				docs = buildProjectDocuments(projectRows, {
					localities: localityMap,
					styleTags: styleTagIdentityMap,
					materialTags: materialTagMap,
				});
			} else if (type === "room") {
				const visibleProjectLookup = new Map(
					rawProjects.filter((p) => publishedProIds.has(p.proId)).map((p) => [p.id, p]),
				);
				// Pass no filters — findPublishedRooms now resolves the
				// published-project-of-published-pro constraint server-side
				// via a correlated EXISTS, avoiding an unbounded project-ID
				// bind list that would have hit D1's 100-param limit once the
				// catalog grew.
				const { rooms: rawRooms } = await dal.rooms.findPublishedRooms({}, 1, 100000);
 
				const allRoomIds = rawRooms.map((r) => r.id);
				const roomMedia = allRoomIds.length > 0 ? await dal.media.findByRoomIds(allRoomIds) : [];
				const roomCoverMap = new Map<number, string>();
				for (const m of roomMedia) {
					if (!roomCoverMap.has(m.roomId)) roomCoverMap.set(m.roomId, m.storageKey);
					if (m.isCover) roomCoverMap.set(m.roomId, m.storageKey);
				}
 
				const roomRows: RoomRow[] = rawRooms.map((r) => {
					const parentProject = visibleProjectLookup.get(r.projectId);
					const parentPro = parentProject ? proLookup.get(parentProject.proId) : undefined;
					const localityId = parentProject?.localityId ?? null;
					return {
						id: String(r.id),
						slug: r.slug ?? String(r.id),
						name: r.name,
						description: r.description,
						roomType: r.roomType,
						styleTags: r.styleTags ? JSON.stringify(r.styleTags) : null,
						materials: r.materials ? JSON.stringify(r.materials) : null,
						coverImage: roomCoverMap.get(r.id) ?? null,
						proName: parentPro?.businessName ?? null,
						projectTitle: parentProject?.title ?? null,
						proLogo: parentPro?.logoUrl ?? parentPro?.profileImage ?? null,
						localityId,
						cityId: localityId ? localityToCity.get(localityId) ?? null : null,
						// Quality ranking fields (E3A, E7A). Rooms inherit qualityScore and
						// proId from their parent project so the search service can apply
						// per-pro diversity quota across both entityType=project and room.
						proId: parentProject?.proId ?? "",
						qualityScore: parentProject?.qualityScore ?? 50,
					};
				});
				docs = buildRoomDocuments(roomRows, {
					roomTypes: roomTypeMap,
					styleTags: styleTagIdentityMap,
					materialTags: roomMaterialIdentityMap,
				});
			E} else if (type === "blog") {
				const visibleBlogs = rawBlogs.filter((b) => {
					if (b.ideaSource === "editorial") return true;
					if (b.ideaSourceProId && publishedProIds.has(b.ideaSourceProId)) return true;
					return false;
				});
				const blogRows: BlogRow[] = visibleBlogs.map((b) => ({
					id: b.id,
					slug: b.slug,
					title: b.title,
					metaDescription: b.metaDescription,
					primaryKeyword: b.primaryKeyword,
					secondaryKeywords: b.secondaryKeywords ? JSON.stringify(b.secondaryKeywords) : null,
					categoryId: b.categoryId,
					cityId: b.cityId,
					featuredImageUrl: b.featuredImageUrl,
					readTimeMinutes: b.readTimeMinutes,
					categoryName: b.categoryId ? blogCategoryMap.get(b.categoryId) ?? null : null,
				}));
				docs = buildBlogDocuments(blogRows, {
					blogCategories: blogCategoryMap,
					cities: cityMap,
				});
			}
 
			// Build MiniSearch index and upload to R2
			const indexJson = SearchService.buildIndex(docs);
			await env.R2.put(`search-indexes/${type}s.json`, indexJson, {
				httpMetadata: { contentType: "application/json" },
			});
 
			stats.push(`${type}: ${docs.length} docs`);
		}
 
		// Invalidate cached SearchService instances via KV version bump
		if (env.KV_CACHE) {
			await env.KV_CACHE.put("search-index-version", Date.now().toString());
		}
 
		console.log(`[search-cron] Indexes rebuilt: ${stats.join(", ")}`);
	} catch (err) {
		console.error("[search-cron] Failed to rebuild search indexes:", err);
	}
}