{"feature_extractor_name":"universal_extractor","version":"v1","feature_extractor_id":"universal_extractor_v1","description":"**Universal all-in-one extractor** — handles image, video, audio, and documents in a single extractor using Gemini APIs.\n\nAutomatically detects modality and applies the best processing strategy:\n- **Image**: Embedding + description + OCR\n- **Video**: Segment into clips → embedding + transcription + scene description per segment\n- **Audio**: Transcription → embedding per segment\n- **Document**: Page-level embedding + text extraction + OCR\n\nAll embeddings use Gemini Embedding 2 (3072-d) in a unified multimodal vector space, enabling cross-modal search (e.g., text query finds relevant video segments).\n\n**Runs on Celery (no Ray startup delay)** — optimized for the Studio upload flow where files need to be searchable in seconds, not minutes.\n\n**Model:** gemini-embedding-2 (Gemini Embedding 2, 3072-d)","icon":"zap","category":"multimodal","source":"builtin","type_mode":"multimodal","expected_input_types":null,"inference_type":"embedding","input_schema":{"description":"Input: a single blob field (image, video, audio, or document).","examples":[{"content":"s3://bucket/product_photo.jpg"},{"content":"s3://bucket/demo_video.mp4"}],"properties":{"content":{"description":"URL or path to the file to process. Populated from input_mappings.","title":"Content","type":"string"}},"required":["content"],"title":"UniversalExtractorInput","type":"object"},"output_schema":{"additionalProperties":true,"description":"Output: embedding + extracted text + metadata.","properties":{"universal_extractor_v1_embedding":{"description":"Gemini Embedding 2 vector (3072-d) for the content.","items":{"type":"number"},"title":"Universal Extractor V1 Embedding","type":"array"},"modality":{"description":"Detected modality: image, video, audio, or document.","title":"Modality","type":"string"},"text":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"description":"Extracted text (OCR, transcription, or document text).","title":"Text"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"description":"AI-generated description of the content.","title":"Description"},"segment_index":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"description":"Segment index (for chunked content like video/audio/documents).","title":"Segment Index"},"segment_total":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"description":"Total segments for this source object.","title":"Segment Total"},"page_number":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"description":"Page number (documents only).","title":"Page Number"},"start_time_s":{"anyOf":[{"type":"number"},{"type":"null"}],"default":null,"description":"Segment start time in seconds (video/audio only).","title":"Start Time S"},"end_time_s":{"anyOf":[{"type":"number"},{"type":"null"}],"default":null,"description":"Segment end time in seconds (video/audio only).","title":"End Time S"},"duration_s":{"anyOf":[{"type":"number"},{"type":"null"}],"default":null,"description":"Total file duration in seconds (video/audio only).","title":"Duration S"}},"required":["universal_extractor_v1_embedding","modality"],"title":"UniversalExtractorOutput","type":"object"},"parameter_schema":{"description":"Parameters for the Universal Extractor.","examples":[{"extract_text":true,"extractor_type":"universal_extractor","generate_description":true,"max_concurrency":4,"max_file_download_mb":500,"output_dimensionality":3072,"task_type":"RETRIEVAL_DOCUMENT"}],"properties":{"extractor_type":{"const":"universal_extractor","default":"universal_extractor","description":"Discriminator field for parameter type identification.","title":"Extractor Type","type":"string"},"output_dimensionality":{"default":3072,"description":"Output embedding dimensions (Gemini Embedding 2 supports 256-3072).","maximum":3072,"minimum":256,"title":"Output Dimensionality","type":"integer"},"task_type":{"default":"RETRIEVAL_DOCUMENT","description":"Embedding intent used as a text instruction for Gemini Embedding 2. Common values: RETRIEVAL_DOCUMENT, RETRIEVAL_QUERY, SEMANTIC_SIMILARITY.","title":"Task Type","type":"string"},"generate_description":{"default":true,"description":"Generate a text description of the content via Gemini vision/understanding.","title":"Generate Description","type":"boolean"},"extract_text":{"default":true,"description":"Extract text content (OCR for images/docs, transcription for audio/video).","title":"Extract Text","type":"boolean"},"max_video_segments":{"default":10,"description":"Maximum number of 30s segments to process for video files.","maximum":50,"minimum":1,"title":"Max Video Segments","type":"integer"},"max_document_pages":{"default":50,"description":"Maximum number of pages to process for document files.","maximum":200,"minimum":1,"title":"Max Document Pages","type":"integer"},"max_file_download_mb":{"default":500,"description":"Maximum file download size in MB for Celery fast-path processing.","maximum":1024,"minimum":1,"title":"Max File Download Mb","type":"integer"},"max_concurrency":{"default":4,"description":"Maximum per-task object concurrency for Celery fast-path processing.","maximum":32,"minimum":1,"title":"Max Concurrency","type":"integer"}},"title":"UniversalExtractorParams","type":"object"},"supported_input_types":["image","video","audio","pdf","text","string"],"max_inputs":{"image":1,"video":1,"audio":1,"pdf":1,"text":1,"string":1},"default_parameters":{},"costs":{"tier":2,"tier_label":"STANDARD","rates":[{"unit":"image","credits_per_unit":15,"description":"Per object processed (covers all Gemini API calls: embedding, description, OCR/transcription)"}]},"required_vector_indexes":[{"feature_uri":"mixpeek://universal_extractor@v1/gemini-embedding-2","name":"gemini-embedding-2","description":"Gemini Embedding 2 multimodal embedding (3072-d). Supports image, video, audio, and document content in a unified vector space.","type":"single","index":{"name":"universal_extractor_v1_embedding","description":"Dense vector embedding using Gemini Embedding 2. All modalities (image, video, audio, document) map to the same 3072-d space for cross-modal search.","dimensions":3072,"type":"dense","distance":"Cosine","datatype":"float32","on_disk":null,"supported_inputs":["image","video","audio","pdf","text","string"],"inference_name":"google__gemini_embedding_2","inference_service_id":"google/gemini-embedding-2","purpose":null,"vector_name_override":null,"supports_multi_query":true}}],"required_payload_indexes":[],"position_fields":["segment_index"],"capabilities":["batch","realtime"],"example_usage":{"namespace":{"feature_extractors":[{"name":"universal_extractor","version":"v1"}]},"collection":{"feature_extractor":{"name":"universal_extractor","version":"v1","input_mappings":{"content":"<your_content_field>"},"parameters":{"output_dimensionality":3072,"task_type":"RETRIEVAL_DOCUMENT","generate_description":true,"extract_text":true,"max_video_segments":10,"max_document_pages":50,"max_file_download_mb":500,"max_concurrency":4}}}}}