{"feature_extractor_name":"gemini_multifile_extractor","version":"v1","feature_extractor_id":"gemini_multifile_extractor_v1","description":"**Multi-file object embedding using Gemini Embedding 2** (gemini-embedding-2, 3072-d).\n\nEmbeds ALL files of an object (images, PDFs, video, audio, text) into a SINGLE unified vector in one Gemini API call. Use when you want object-level search where similarity is based on the combined content of all an object's blobs — not individual file-level search.\n\n**Configure with array input_mappings:**\n```json\n{\"files\": [\"hero_image\", \"spec_sheet\", \"description\"]}\n```\n\n**When to use:**\n- Product catalogs: embed image + spec PDF + description together\n- Medical records: embed scan + report + notes together\n- Legal documents: embed contract + exhibits + summary together\n- E-commerce: embed product photo + manual + label together\n\n**Output:** One 3072-d embedding per object (not per file).\n**Model:** gemini-embedding-2 (Gemini Embedding 2)","icon":"layers","category":"multimodal","source":"builtin","type_mode":"multimodal","expected_input_types":null,"inference_type":"embedding","input_schema":{"description":"Input schema for the Gemini Multifile Extractor.\n\nAccepts an array-valued input key whose values are URLs or text content\nfrom multiple blob fields. All items are embedded together in a single\nGemini API call, producing one embedding per object.\n\nConfigure via input_mappings:\n    {\"files\": [\"hero_image\", \"spec_sheet\", \"description\"]}","examples":[{"files":["s3://bucket/hero_image.jpg","s3://bucket/spec_sheet.pdf","Lightweight carbon-fiber trail running shoe"]}],"properties":{"files":{"description":"List of blob field values (URLs or text) to embed together. Populated automatically from array input_mappings — not set directly by users.","items":{"type":"string"},"title":"Files","type":"array"}},"required":["files"],"title":"GeminiMultifileExtractorInput","type":"object"},"output_schema":{"additionalProperties":true,"description":"Output schema for the Gemini Multifile Extractor.","properties":{"gemini_multifile_extractor_v1_embedding":{"description":"Gemini Embedding 2 vector (3072-d by default) for all input files combined.","items":{"type":"number"},"title":"Gemini Multifile Extractor V1 Embedding","type":"array"},"source_blob_count":{"description":"Number of blobs that were embedded together.","title":"Source Blob Count","type":"integer"},"source_blob_properties":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"default":null,"description":"Blob property names that contributed to this embedding.","title":"Source Blob Properties"}},"required":["gemini_multifile_extractor_v1_embedding","source_blob_count"],"title":"GeminiMultifileExtractorOutput","type":"object"},"parameter_schema":{"description":"Parameters for the Gemini Multifile Extractor.\n\nUses Gemini Embedding 2 (gemini-embedding-2) to embed all files\nof an object into a single 3072-d vector in one API call. Supports images,\nvideo, audio, PDF, and text blobs.","examples":[{"extractor_type":"gemini_multifile_extractor","input_key":"files","output_dimensionality":3072,"task_type":"RETRIEVAL_DOCUMENT"}],"properties":{"extractor_type":{"const":"gemini_multifile_extractor","default":"gemini_multifile_extractor","description":"Discriminator field for parameter type identification.","title":"Extractor Type","type":"string"},"output_dimensionality":{"default":3072,"description":"Output embedding dimensions. Gemini Embedding 2 supports 3072 (default), 768, or 256 via truncation. Lower dimensions reduce storage cost at slight quality loss.","maximum":3072,"minimum":256,"title":"Output Dimensionality","type":"integer"},"task_type":{"default":"RETRIEVAL_DOCUMENT","description":"Embedding intent used as a text instruction for Gemini Embedding 2. Common values: RETRIEVAL_DOCUMENT, RETRIEVAL_QUERY, SEMANTIC_SIMILARITY, CLASSIFICATION.","title":"Task Type","type":"string"},"input_key":{"default":"files","description":"The input_mappings key whose value is the list of blob fields to embed together. Must match the key used in input_mappings (e.g., 'files'). Default: 'files'.","title":"Input Key","type":"string"}},"title":"GeminiMultifileExtractorParams","type":"object"},"supported_input_types":["image","video","audio","pdf","text","string"],"max_inputs":{"image":100,"video":100,"audio":100,"pdf":100,"text":100,"string":100},"default_parameters":{},"costs":{"tier":3,"tier_label":"ADVANCED","rates":[{"unit":"image","credits_per_unit":10,"description":"Base cost per object embedded (covers all files in the object)"}]},"required_vector_indexes":[{"feature_uri":"mixpeek://gemini_multifile_extractor@v1/gemini-embedding-2","name":"gemini-embedding-2","description":"Gemini Embedding 2 unified multimodal embedding (3072-d). All files of an object embedded into one vector for cross-modal object-level search.","type":"single","index":{"name":"gemini_multifile_extractor_v1_embedding","description":"Dense vector embedding for multi-file objects using Gemini Embedding 2. Supports images, video, audio, PDF, and text in a unified 3072-d space.","dimensions":3072,"type":"dense","distance":"Cosine","datatype":"float32","on_disk":null,"supported_inputs":["image","video","audio","pdf","text","string"],"inference_name":"google__gemini_embedding_2","inference_service_id":"google/gemini-embedding-2","purpose":null,"vector_name_override":null,"supports_multi_query":true}}],"required_payload_indexes":[],"position_fields":[],"capabilities":["batch","realtime"],"example_usage":{"namespace":{"feature_extractors":[{"name":"gemini_multifile_extractor","version":"v1"}]},"collection":{"feature_extractor":{"name":"gemini_multifile_extractor","version":"v1","input_mappings":{"files":"<your_files_field>"},"parameters":{"output_dimensionality":3072,"task_type":"RETRIEVAL_DOCUMENT","input_key":"files"}}}}}