{"feature_extractor_name":"face_identity_extractor","version":"v1","feature_extractor_id":"face_identity_extractor_v1","description":"Production-grade face recognition using state-of-the-art models (SCRFD + ArcFace). Detects faces, aligns to canonical template, generates 512-dimensional embeddings with 99.8%+ verification accuracy (LFW benchmark). Supports images, videos, and PDFs.\n\n**Pipeline Steps:**\n1. Filter dataset to collection (if collection_id provided)\n2. **Content Type Routing:**\n   - **Images:** Direct to Step 3\n   - **Videos:** Frame extraction (sampling at video_sampling_fps) → Step 3\n   - **PDFs:** Page rendering → Step 3\n   - **Mixed:** Branch by type, process separately, then union\n3. Face detection using SCRFD\n   - Detect all faces per image/frame/page\n   - Extract 5-point facial landmarks (eyes, nose, mouth)\n   - Filter by min_face_size and detection_threshold\n4. 5-point affine face alignment\n   - Warp face to canonical 112x112 template\n   - Mandatory for consistent embeddings\n5. ArcFace embedding generation (512D, L2 normalized)\n   - arcface_r100 model\n   - Cosine similarity for matching\n6. **Conditional:** Quality scoring (if enable_quality_scoring=true)\n   - Assess blur, size, landmark confidence\n   - Filter by quality_threshold if specified\n7. **Conditional:** Video deduplication (if video_deduplication=true AND video content)\n   - Remove duplicate faces across frames\n   - Threshold-based similarity matching\n   - Track face timelines in video\n8. Output validation\n9. **Output:** Per-face documents with embeddings, bbox, landmarks, and quality scores\n\n**Use for:** Employee verification, photo organization, face clustering, surveillance, identity systems.\n\n**Not for:** General image search (use image_extractor), object detection (use multimodal_extractor).","icon":"user-circle","source":"builtin","input_schema":{"description":"Input schema for face identity extractor.\n\nProvide exactly ONE of: image, video, video_frame, or pdf.","examples":[{"description":"Single portrait","image":"s3://photos/john-doe-portrait.jpg"},{"description":"Video segment","video":"s3://segments/interview-clip.mp4"},{"description":"PDF document with photo","pdf":"s3://documents/id-card.pdf"}],"properties":{"image":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"description":"Image URL or S3 path containing faces. Formats: JPEG, PNG, WebP, BMP. Resolution: 640px+ recommended.","examples":["s3://bucket/photos/portrait.jpg","https://example.com/photo.png"],"title":"Image"},"video":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"description":"Video URL or S3 path. Subject to max_video_length limit. Formats: MP4, MOV, AVI, MKV, WebM. Sampling controlled by video_sampling_fps.","examples":["s3://bucket/videos/interview-60sec.mp4","s3://bucket/segments/clip-5sec.mp4"],"title":"Video"},"video_frame":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"description":"Single video frame URL or S3 path. Treated as image.","examples":["s3://bucket/frames/frame_001.jpg"],"title":"Video Frame"},"pdf":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"description":"PDF URL or S3 path containing faces in pages. Each page is converted to an image and processed for faces. Useful for ID documents, resumes, forms with photos.","examples":["s3://bucket/documents/id-card.pdf","s3://bucket/resumes/profile-with-photo.pdf"],"title":"Pdf"}},"title":"FaceIdentityExtractorInput","type":"object"},"output_schema":{"additionalProperties":true,"description":"Output schema for face identity extractor.\n\nEach document represents ONE detected face with its aligned embedding.\n\nNote: document_id, collection_id, and source_object_id are added automatically\nby the system and should not be included in extractor output.","properties":{"face_identity_extractor_v1_embedding":{"description":"ArcFace face embedding (512-d L2-normalized vector). Use cosine similarity for face matching. Similarity > 0.25-0.30 indicates same person.","items":{"type":"number"},"maxItems":512,"minItems":512,"title":"Face Identity Extractor V1 Embedding","type":"array"},"face_index":{"description":"Index of this face in source image (0-based)","title":"Face Index","type":"integer"},"bbox":{"additionalProperties":{"type":"number"},"description":"Face bounding box {x1, y1, x2, y2, width, height}","title":"Bbox","type":"object"},"detection_score":{"description":"SCRFD detection confidence (0.0-1.0)","title":"Detection Score","type":"number"},"landmarks":{"additionalProperties":{"items":{"type":"number"},"type":"array"},"description":"5 facial landmarks for alignment","title":"Landmarks","type":"object"},"quality_score":{"anyOf":[{"type":"number"},{"type":"null"}],"default":null,"description":"Face quality score (0.0-1.0)","title":"Quality Score"},"quality_components":{"anyOf":[{"additionalProperties":{"type":"number"},"type":"object"},{"type":"null"}],"default":null,"description":"Quality component scores","title":"Quality Components"},"aligned_face_crop":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"description":"Base64 aligned 112×112 face crop","title":"Aligned Face Crop"},"source_frame_thumbnail":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"description":"Base64 resized source frame thumbnail for display with bbox overlay","title":"Source Frame Thumbnail"},"source_frame_width":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"description":"Original source frame width in pixels","title":"Source Frame Width"},"source_frame_height":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"description":"Original source frame height in pixels","title":"Source Frame Height"},"frame_number":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"description":"Frame number in source video","title":"Frame Number"},"timestamp":{"anyOf":[{"type":"number"},{"type":"null"}],"default":null,"description":"Timestamp in source video (seconds)","title":"Timestamp"},"page_number":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"description":"Page number in source PDF (0-based)","title":"Page Number"},"embedding_model":{"description":"Embedding model used","title":"Embedding Model","type":"string"},"detection_model":{"description":"Detection model used","title":"Detection Model","type":"string"},"processing_time_ms":{"description":"Processing time (milliseconds)","title":"Processing Time Ms","type":"number"}},"required":["face_identity_extractor_v1_embedding","face_index","bbox","detection_score","landmarks","embedding_model","detection_model","processing_time_ms"],"title":"FaceIdentityExtractorOutput","type":"object"},"parameter_schema":{"description":"Parameters for the Face Identity Extractor.\n\nThe Face Identity Extractor processes images or video frames to detect, align,\nand embed faces using production-grade SOTA models (SCRFD + ArcFace).\n\nCore Pipeline:\n1. SCRFD Detection → Bounding boxes + 5 landmarks\n2. 5-Point Affine Alignment → 112×112 canonical face\n3. ArcFace Embedding → 512-d L2-normalized vector\n4. Optional Quality Scoring → Filter low-quality faces\n\nUse Cases:\n    - Face verification (1:1 matching)\n    - Face identification (1:N search)\n    - Face clustering (group photos by person)\n    - Duplicate face detection","examples":[{"description":"Employee verification (high quality, 1:1 matching)","detection_model":"scrfd_2.5g","detection_threshold":0.7,"enable_quality_scoring":true,"extractor_type":"face_identity_extractor","max_faces_per_image":1,"min_face_size":40,"normalize_embeddings":true,"output_mode":"per_face","quality_threshold":0.5,"use_case":"Corporate access control, employee ID photos for badge matching"},{"description":"Photo library organization (multiple faces)","detection_model":"scrfd_2.5g","detection_threshold":0.6,"enable_quality_scoring":true,"extractor_type":"face_identity_extractor","max_faces_per_image":null,"min_face_size":30,"output_mode":"per_face","store_detection_metadata":true,"use_case":"Personal photo management: group photos by person"}],"properties":{"extractor_type":{"const":"face_identity_extractor","default":"face_identity_extractor","description":"Discriminator field for parameter type identification. Must be 'face_identity_extractor'.","title":"Extractor Type","type":"string"},"detection_model":{"default":"scrfd_2.5g","description":"SCRFD model for face detection. 'scrfd_500m': Fastest (2-3ms). 'scrfd_2.5g': Balanced (5-7ms), recommended. 'scrfd_10g': Highest accuracy (10-15ms).","enum":["scrfd_500m","scrfd_2.5g","scrfd_10g"],"title":"Detection Model","type":"string"},"min_face_size":{"default":20,"description":"Minimum face size in pixels to detect. 20px: Balanced. 40px: Higher quality. 10px: Maximum recall.","maximum":200,"minimum":10,"title":"Min Face Size","type":"integer"},"detection_threshold":{"default":0.5,"description":"Confidence threshold for face detection (0.0-1.0).","maximum":1.0,"minimum":0.0,"title":"Detection Threshold","type":"number"},"max_faces_per_image":{"anyOf":[{"minimum":1,"type":"integer"},{"type":"null"}],"default":null,"description":"Maximum number of faces to process per image. None: Process all.","title":"Max Faces Per Image"},"normalize_embeddings":{"default":true,"description":"L2-normalize embeddings to unit vectors (recommended).","title":"Normalize Embeddings","type":"boolean"},"enable_quality_scoring":{"default":true,"description":"Compute quality scores (blur, size, landmarks). Adds ~5ms per face.","title":"Enable Quality Scoring","type":"boolean"},"quality_threshold":{"anyOf":[{"maximum":1.0,"minimum":0.0,"type":"number"},{"type":"null"}],"default":null,"description":"Minimum quality score to index faces. None: Index all faces. 0.5: Moderate filtering. 0.7: High quality only.","title":"Quality Threshold"},"max_video_length":{"default":60,"description":"Maximum video length in seconds. 60: Default. 10: Recommended for retrieval. 300: Maximum (extraction only).","maximum":300,"minimum":1,"title":"Max Video Length","type":"integer"},"video_sampling_fps":{"anyOf":[{"maximum":60.0,"minimum":0.1,"type":"number"},{"type":"null"}],"default":1.0,"description":"Frames per second to sample from video. 1.0: One frame per second (recommended).","title":"Video Sampling Fps"},"video_deduplication":{"default":true,"description":"Remove duplicate faces across video frames (extraction only). Reduces 90-95% redundancy. NOT used in retrieval.","title":"Video Deduplication","type":"boolean"},"video_deduplication_threshold":{"default":0.8,"description":"Cosine similarity threshold for deduplication. 0.8: Conservative (default).","maximum":1.0,"minimum":0.0,"title":"Video Deduplication Threshold","type":"number"},"output_mode":{"default":"per_face","description":"'per_face': One document per face (recommended). 'per_image': One doc per image with faces array.","enum":["per_face","per_image"],"title":"Output Mode","type":"string"},"include_face_crops":{"default":false,"description":"Include aligned 112×112 face crops as base64. Adds ~5KB per face.","title":"Include Face Crops","type":"boolean"},"include_source_frame_thumbnail":{"default":false,"description":"Include resized source frame/image as base64 thumbnail (~15-30KB per face). Used for display with bounding box overlay.","title":"Include Source Frame Thumbnail","type":"boolean"},"store_detection_metadata":{"default":true,"description":"Store bbox, landmarks, detection scores. Recommended for debugging.","title":"Store Detection Metadata","type":"boolean"}},"title":"FaceIdentityExtractorParams","type":"object"},"supported_input_types":["image","video","pdf"],"max_inputs":{"image":1,"video":1,"pdf":1},"default_parameters":{},"costs":{"tier":3,"tier_label":"COMPLEX","rates":[{"unit":"image","credits_per_unit":5,"description":"Base cost per image processed"},{"unit":"face","credits_per_unit":5,"description":"Additional cost per face detected"}]},"required_vector_indexes":[{"feature_uri":"mixpeek://face_identity_extractor@v1/insightface__arcface","name":"insightface__arcface","description":"Vector index for face identity embeddings (ArcFace 512-d)","type":"single","index":{"name":"face_identity_extractor_v1_embedding","description":"Dense vector embedding for face identity verification and search. 512-dimensional L2-normalized ArcFace embeddings. Use cosine similarity for face matching. Similarity threshold: 0.25-0.30 for same person verification.","dimensions":512,"type":"dense","distance":"Cosine","datatype":"float32","on_disk":null,"supported_inputs":["image"],"inference_name":"insightface__arcface","inference_service_id":"insightface/arcface","purpose":null,"vector_name_override":null,"supports_multi_query":false}}],"required_payload_indexes":[],"position_fields":["face_index","frame_number","page_number"]}