AWS v6.71.0, Mar 7 25

AWS v6.71.0 published on Friday, Mar 7, 2025 by Pulumi

aws.sagemaker.EndpointConfiguration

Explore with Pulumi AI

AWS v6.71.0 published on Friday, Mar 7, 2025 by Pulumi

pulumi/pulumi-aws

Example Usage

Basic usage:

import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";

const ec = new aws.sagemaker.EndpointConfiguration("ec", {
    name: "my-endpoint-config",
    productionVariants: [{
        variantName: "variant-1",
        modelName: m.name,
        initialInstanceCount: 1,
        instanceType: "ml.t2.medium",
    }],
    tags: {
        Name: "foo",
    },
});

import pulumi
import pulumi_aws as aws

ec = aws.sagemaker.EndpointConfiguration("ec",
    name="my-endpoint-config",
    production_variants=[{
        "variant_name": "variant-1",
        "model_name": m["name"],
        "initial_instance_count": 1,
        "instance_type": "ml.t2.medium",
    }],
    tags={
        "Name": "foo",
    })

package main

import (
	"github.com/pulumi/pulumi-aws/sdk/v6/go/aws/sagemaker"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := sagemaker.NewEndpointConfiguration(ctx, "ec", &sagemaker.EndpointConfigurationArgs{
			Name: pulumi.String("my-endpoint-config"),
			ProductionVariants: sagemaker.EndpointConfigurationProductionVariantArray{
				&sagemaker.EndpointConfigurationProductionVariantArgs{
					VariantName:          pulumi.String("variant-1"),
					ModelName:            pulumi.Any(m.Name),
					InitialInstanceCount: pulumi.Int(1),
					InstanceType:         pulumi.String("ml.t2.medium"),
				},
			},
			Tags: pulumi.StringMap{
				"Name": pulumi.String("foo"),
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Aws = Pulumi.Aws;

return await Deployment.RunAsync(() => 
{
    var ec = new Aws.Sagemaker.EndpointConfiguration("ec", new()
    {
        Name = "my-endpoint-config",
        ProductionVariants = new[]
        {
            new Aws.Sagemaker.Inputs.EndpointConfigurationProductionVariantArgs
            {
                VariantName = "variant-1",
                ModelName = m.Name,
                InitialInstanceCount = 1,
                InstanceType = "ml.t2.medium",
            },
        },
        Tags = 
        {
            { "Name", "foo" },
        },
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.sagemaker.EndpointConfiguration;
import com.pulumi.aws.sagemaker.EndpointConfigurationArgs;
import com.pulumi.aws.sagemaker.inputs.EndpointConfigurationProductionVariantArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var ec = new EndpointConfiguration("ec", EndpointConfigurationArgs.builder()
            .name("my-endpoint-config")
            .productionVariants(EndpointConfigurationProductionVariantArgs.builder()
                .variantName("variant-1")
                .modelName(m.name())
                .initialInstanceCount(1)
                .instanceType("ml.t2.medium")
                .build())
            .tags(Map.of("Name", "foo"))
            .build());

    }
}

resources:
  ec:
    type: aws:sagemaker:EndpointConfiguration
    properties:
      name: my-endpoint-config
      productionVariants:
        - variantName: variant-1
          modelName: ${m.name}
          initialInstanceCount: 1
          instanceType: ml.t2.medium
      tags:
        Name: foo

Create EndpointConfiguration Resource

Resources are created with functions called constructors. To learn more about declaring and configuring resources, see Resources.

Constructor syntax

new EndpointConfiguration(name: string, args: EndpointConfigurationArgs, opts?: CustomResourceOptions);

@overload
def EndpointConfiguration(resource_name: str,
                          args: EndpointConfigurationArgs,
                          opts: Optional[ResourceOptions] = None)

@overload
def EndpointConfiguration(resource_name: str,
                          opts: Optional[ResourceOptions] = None,
                          production_variants: Optional[Sequence[EndpointConfigurationProductionVariantArgs]] = None,
                          async_inference_config: Optional[EndpointConfigurationAsyncInferenceConfigArgs] = None,
                          data_capture_config: Optional[EndpointConfigurationDataCaptureConfigArgs] = None,
                          kms_key_arn: Optional[str] = None,
                          name: Optional[str] = None,
                          name_prefix: Optional[str] = None,
                          shadow_production_variants: Optional[Sequence[EndpointConfigurationShadowProductionVariantArgs]] = None,
                          tags: Optional[Mapping[str, str]] = None)

func NewEndpointConfiguration(ctx *Context, name string, args EndpointConfigurationArgs, opts ...ResourceOption) (*EndpointConfiguration, error)

public EndpointConfiguration(string name, EndpointConfigurationArgs args, CustomResourceOptions? opts = null)

public EndpointConfiguration(String name, EndpointConfigurationArgs args)
public EndpointConfiguration(String name, EndpointConfigurationArgs args, CustomResourceOptions options)

type: aws:sagemaker:EndpointConfiguration
properties: # The arguments to resource properties.
options: # Bag of options to control resource's behavior.

Parameters

name string: The unique name of the resource.
args EndpointConfigurationArgs: The arguments to resource properties.
opts CustomResourceOptions: Bag of options to control resource's behavior.

resource_name str: The unique name of the resource.
args EndpointConfigurationArgs: The arguments to resource properties.
opts ResourceOptions: Bag of options to control resource's behavior.

ctx Context: Context object for the current deployment.
name string: The unique name of the resource.
args EndpointConfigurationArgs: The arguments to resource properties.
opts ResourceOption: Bag of options to control resource's behavior.

name string: The unique name of the resource.
args EndpointConfigurationArgs: The arguments to resource properties.
opts CustomResourceOptions: Bag of options to control resource's behavior.

name String: The unique name of the resource.
args EndpointConfigurationArgs: The arguments to resource properties.
options CustomResourceOptions: Bag of options to control resource's behavior.

Constructor example

The following reference example uses placeholder values for all input properties.

var endpointConfigurationResource = new Aws.Sagemaker.EndpointConfiguration("endpointConfigurationResource", new()
{
    ProductionVariants = new[]
    {
        new Aws.Sagemaker.Inputs.EndpointConfigurationProductionVariantArgs
        {
            ModelName = "string",
            InitialVariantWeight = 0,
            ManagedInstanceScaling = new Aws.Sagemaker.Inputs.EndpointConfigurationProductionVariantManagedInstanceScalingArgs
            {
                MaxInstanceCount = 0,
                MinInstanceCount = 0,
                Status = "string",
            },
            EnableSsmAccess = false,
            InferenceAmiVersion = "string",
            InitialInstanceCount = 0,
            AcceleratorType = "string",
            InstanceType = "string",
            CoreDumpConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationProductionVariantCoreDumpConfigArgs
            {
                DestinationS3Uri = "string",
                KmsKeyId = "string",
            },
            ModelDataDownloadTimeoutInSeconds = 0,
            ContainerStartupHealthCheckTimeoutInSeconds = 0,
            RoutingConfigs = new[]
            {
                new Aws.Sagemaker.Inputs.EndpointConfigurationProductionVariantRoutingConfigArgs
                {
                    RoutingStrategy = "string",
                },
            },
            ServerlessConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationProductionVariantServerlessConfigArgs
            {
                MaxConcurrency = 0,
                MemorySizeInMb = 0,
                ProvisionedConcurrency = 0,
            },
            VariantName = "string",
            VolumeSizeInGb = 0,
        },
    },
    AsyncInferenceConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationAsyncInferenceConfigArgs
    {
        OutputConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationAsyncInferenceConfigOutputConfigArgs
        {
            S3OutputPath = "string",
            KmsKeyId = "string",
            NotificationConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArgs
            {
                ErrorTopic = "string",
                IncludeInferenceResponseIns = new[]
                {
                    "string",
                },
                SuccessTopic = "string",
            },
            S3FailurePath = "string",
        },
        ClientConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationAsyncInferenceConfigClientConfigArgs
        {
            MaxConcurrentInvocationsPerInstance = 0,
        },
    },
    DataCaptureConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationDataCaptureConfigArgs
    {
        CaptureOptions = new[]
        {
            new Aws.Sagemaker.Inputs.EndpointConfigurationDataCaptureConfigCaptureOptionArgs
            {
                CaptureMode = "string",
            },
        },
        DestinationS3Uri = "string",
        InitialSamplingPercentage = 0,
        CaptureContentTypeHeader = new Aws.Sagemaker.Inputs.EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgs
        {
            CsvContentTypes = new[]
            {
                "string",
            },
            JsonContentTypes = new[]
            {
                "string",
            },
        },
        EnableCapture = false,
        KmsKeyId = "string",
    },
    KmsKeyArn = "string",
    Name = "string",
    NamePrefix = "string",
    ShadowProductionVariants = new[]
    {
        new Aws.Sagemaker.Inputs.EndpointConfigurationShadowProductionVariantArgs
        {
            ModelName = "string",
            InitialVariantWeight = 0,
            ManagedInstanceScaling = new Aws.Sagemaker.Inputs.EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs
            {
                MaxInstanceCount = 0,
                MinInstanceCount = 0,
                Status = "string",
            },
            EnableSsmAccess = false,
            InferenceAmiVersion = "string",
            InitialInstanceCount = 0,
            AcceleratorType = "string",
            InstanceType = "string",
            CoreDumpConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationShadowProductionVariantCoreDumpConfigArgs
            {
                DestinationS3Uri = "string",
                KmsKeyId = "string",
            },
            ModelDataDownloadTimeoutInSeconds = 0,
            ContainerStartupHealthCheckTimeoutInSeconds = 0,
            RoutingConfigs = new[]
            {
                new Aws.Sagemaker.Inputs.EndpointConfigurationShadowProductionVariantRoutingConfigArgs
                {
                    RoutingStrategy = "string",
                },
            },
            ServerlessConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationShadowProductionVariantServerlessConfigArgs
            {
                MaxConcurrency = 0,
                MemorySizeInMb = 0,
                ProvisionedConcurrency = 0,
            },
            VariantName = "string",
            VolumeSizeInGb = 0,
        },
    },
    Tags = 
    {
        { "string", "string" },
    },
});

example, err := sagemaker.NewEndpointConfiguration(ctx, "endpointConfigurationResource", &sagemaker.EndpointConfigurationArgs{
	ProductionVariants: sagemaker.EndpointConfigurationProductionVariantArray{
		&sagemaker.EndpointConfigurationProductionVariantArgs{
			ModelName:            pulumi.String("string"),
			InitialVariantWeight: pulumi.Float64(0),
			ManagedInstanceScaling: &sagemaker.EndpointConfigurationProductionVariantManagedInstanceScalingArgs{
				MaxInstanceCount: pulumi.Int(0),
				MinInstanceCount: pulumi.Int(0),
				Status:           pulumi.String("string"),
			},
			EnableSsmAccess:      pulumi.Bool(false),
			InferenceAmiVersion:  pulumi.String("string"),
			InitialInstanceCount: pulumi.Int(0),
			AcceleratorType:      pulumi.String("string"),
			InstanceType:         pulumi.String("string"),
			CoreDumpConfig: &sagemaker.EndpointConfigurationProductionVariantCoreDumpConfigArgs{
				DestinationS3Uri: pulumi.String("string"),
				KmsKeyId:         pulumi.String("string"),
			},
			ModelDataDownloadTimeoutInSeconds:           pulumi.Int(0),
			ContainerStartupHealthCheckTimeoutInSeconds: pulumi.Int(0),
			RoutingConfigs: sagemaker.EndpointConfigurationProductionVariantRoutingConfigArray{
				&sagemaker.EndpointConfigurationProductionVariantRoutingConfigArgs{
					RoutingStrategy: pulumi.String("string"),
				},
			},
			ServerlessConfig: &sagemaker.EndpointConfigurationProductionVariantServerlessConfigArgs{
				MaxConcurrency:         pulumi.Int(0),
				MemorySizeInMb:         pulumi.Int(0),
				ProvisionedConcurrency: pulumi.Int(0),
			},
			VariantName:    pulumi.String("string"),
			VolumeSizeInGb: pulumi.Int(0),
		},
	},
	AsyncInferenceConfig: &sagemaker.EndpointConfigurationAsyncInferenceConfigArgs{
		OutputConfig: &sagemaker.EndpointConfigurationAsyncInferenceConfigOutputConfigArgs{
			S3OutputPath: pulumi.String("string"),
			KmsKeyId:     pulumi.String("string"),
			NotificationConfig: &sagemaker.EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArgs{
				ErrorTopic: pulumi.String("string"),
				IncludeInferenceResponseIns: pulumi.StringArray{
					pulumi.String("string"),
				},
				SuccessTopic: pulumi.String("string"),
			},
			S3FailurePath: pulumi.String("string"),
		},
		ClientConfig: &sagemaker.EndpointConfigurationAsyncInferenceConfigClientConfigArgs{
			MaxConcurrentInvocationsPerInstance: pulumi.Int(0),
		},
	},
	DataCaptureConfig: &sagemaker.EndpointConfigurationDataCaptureConfigArgs{
		CaptureOptions: sagemaker.EndpointConfigurationDataCaptureConfigCaptureOptionArray{
			&sagemaker.EndpointConfigurationDataCaptureConfigCaptureOptionArgs{
				CaptureMode: pulumi.String("string"),
			},
		},
		DestinationS3Uri:          pulumi.String("string"),
		InitialSamplingPercentage: pulumi.Int(0),
		CaptureContentTypeHeader: &sagemaker.EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgs{
			CsvContentTypes: pulumi.StringArray{
				pulumi.String("string"),
			},
			JsonContentTypes: pulumi.StringArray{
				pulumi.String("string"),
			},
		},
		EnableCapture: pulumi.Bool(false),
		KmsKeyId:      pulumi.String("string"),
	},
	KmsKeyArn:  pulumi.String("string"),
	Name:       pulumi.String("string"),
	NamePrefix: pulumi.String("string"),
	ShadowProductionVariants: sagemaker.EndpointConfigurationShadowProductionVariantArray{
		&sagemaker.EndpointConfigurationShadowProductionVariantArgs{
			ModelName:            pulumi.String("string"),
			InitialVariantWeight: pulumi.Float64(0),
			ManagedInstanceScaling: &sagemaker.EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs{
				MaxInstanceCount: pulumi.Int(0),
				MinInstanceCount: pulumi.Int(0),
				Status:           pulumi.String("string"),
			},
			EnableSsmAccess:      pulumi.Bool(false),
			InferenceAmiVersion:  pulumi.String("string"),
			InitialInstanceCount: pulumi.Int(0),
			AcceleratorType:      pulumi.String("string"),
			InstanceType:         pulumi.String("string"),
			CoreDumpConfig: &sagemaker.EndpointConfigurationShadowProductionVariantCoreDumpConfigArgs{
				DestinationS3Uri: pulumi.String("string"),
				KmsKeyId:         pulumi.String("string"),
			},
			ModelDataDownloadTimeoutInSeconds:           pulumi.Int(0),
			ContainerStartupHealthCheckTimeoutInSeconds: pulumi.Int(0),
			RoutingConfigs: sagemaker.EndpointConfigurationShadowProductionVariantRoutingConfigArray{
				&sagemaker.EndpointConfigurationShadowProductionVariantRoutingConfigArgs{
					RoutingStrategy: pulumi.String("string"),
				},
			},
			ServerlessConfig: &sagemaker.EndpointConfigurationShadowProductionVariantServerlessConfigArgs{
				MaxConcurrency:         pulumi.Int(0),
				MemorySizeInMb:         pulumi.Int(0),
				ProvisionedConcurrency: pulumi.Int(0),
			},
			VariantName:    pulumi.String("string"),
			VolumeSizeInGb: pulumi.Int(0),
		},
	},
	Tags: pulumi.StringMap{
		"string": pulumi.String("string"),
	},
})

var endpointConfigurationResource = new EndpointConfiguration("endpointConfigurationResource", EndpointConfigurationArgs.builder()
    .productionVariants(EndpointConfigurationProductionVariantArgs.builder()
        .modelName("string")
        .initialVariantWeight(0)
        .managedInstanceScaling(EndpointConfigurationProductionVariantManagedInstanceScalingArgs.builder()
            .maxInstanceCount(0)
            .minInstanceCount(0)
            .status("string")
            .build())
        .enableSsmAccess(false)
        .inferenceAmiVersion("string")
        .initialInstanceCount(0)
        .acceleratorType("string")
        .instanceType("string")
        .coreDumpConfig(EndpointConfigurationProductionVariantCoreDumpConfigArgs.builder()
            .destinationS3Uri("string")
            .kmsKeyId("string")
            .build())
        .modelDataDownloadTimeoutInSeconds(0)
        .containerStartupHealthCheckTimeoutInSeconds(0)
        .routingConfigs(EndpointConfigurationProductionVariantRoutingConfigArgs.builder()
            .routingStrategy("string")
            .build())
        .serverlessConfig(EndpointConfigurationProductionVariantServerlessConfigArgs.builder()
            .maxConcurrency(0)
            .memorySizeInMb(0)
            .provisionedConcurrency(0)
            .build())
        .variantName("string")
        .volumeSizeInGb(0)
        .build())
    .asyncInferenceConfig(EndpointConfigurationAsyncInferenceConfigArgs.builder()
        .outputConfig(EndpointConfigurationAsyncInferenceConfigOutputConfigArgs.builder()
            .s3OutputPath("string")
            .kmsKeyId("string")
            .notificationConfig(EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArgs.builder()
                .errorTopic("string")
                .includeInferenceResponseIns("string")
                .successTopic("string")
                .build())
            .s3FailurePath("string")
            .build())
        .clientConfig(EndpointConfigurationAsyncInferenceConfigClientConfigArgs.builder()
            .maxConcurrentInvocationsPerInstance(0)
            .build())
        .build())
    .dataCaptureConfig(EndpointConfigurationDataCaptureConfigArgs.builder()
        .captureOptions(EndpointConfigurationDataCaptureConfigCaptureOptionArgs.builder()
            .captureMode("string")
            .build())
        .destinationS3Uri("string")
        .initialSamplingPercentage(0)
        .captureContentTypeHeader(EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgs.builder()
            .csvContentTypes("string")
            .jsonContentTypes("string")
            .build())
        .enableCapture(false)
        .kmsKeyId("string")
        .build())
    .kmsKeyArn("string")
    .name("string")
    .namePrefix("string")
    .shadowProductionVariants(EndpointConfigurationShadowProductionVariantArgs.builder()
        .modelName("string")
        .initialVariantWeight(0)
        .managedInstanceScaling(EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs.builder()
            .maxInstanceCount(0)
            .minInstanceCount(0)
            .status("string")
            .build())
        .enableSsmAccess(false)
        .inferenceAmiVersion("string")
        .initialInstanceCount(0)
        .acceleratorType("string")
        .instanceType("string")
        .coreDumpConfig(EndpointConfigurationShadowProductionVariantCoreDumpConfigArgs.builder()
            .destinationS3Uri("string")
            .kmsKeyId("string")
            .build())
        .modelDataDownloadTimeoutInSeconds(0)
        .containerStartupHealthCheckTimeoutInSeconds(0)
        .routingConfigs(EndpointConfigurationShadowProductionVariantRoutingConfigArgs.builder()
            .routingStrategy("string")
            .build())
        .serverlessConfig(EndpointConfigurationShadowProductionVariantServerlessConfigArgs.builder()
            .maxConcurrency(0)
            .memorySizeInMb(0)
            .provisionedConcurrency(0)
            .build())
        .variantName("string")
        .volumeSizeInGb(0)
        .build())
    .tags(Map.of("string", "string"))
    .build());

endpoint_configuration_resource = aws.sagemaker.EndpointConfiguration("endpointConfigurationResource",
    production_variants=[{
        "model_name": "string",
        "initial_variant_weight": 0,
        "managed_instance_scaling": {
            "max_instance_count": 0,
            "min_instance_count": 0,
            "status": "string",
        },
        "enable_ssm_access": False,
        "inference_ami_version": "string",
        "initial_instance_count": 0,
        "accelerator_type": "string",
        "instance_type": "string",
        "core_dump_config": {
            "destination_s3_uri": "string",
            "kms_key_id": "string",
        },
        "model_data_download_timeout_in_seconds": 0,
        "container_startup_health_check_timeout_in_seconds": 0,
        "routing_configs": [{
            "routing_strategy": "string",
        }],
        "serverless_config": {
            "max_concurrency": 0,
            "memory_size_in_mb": 0,
            "provisioned_concurrency": 0,
        },
        "variant_name": "string",
        "volume_size_in_gb": 0,
    }],
    async_inference_config={
        "output_config": {
            "s3_output_path": "string",
            "kms_key_id": "string",
            "notification_config": {
                "error_topic": "string",
                "include_inference_response_ins": ["string"],
                "success_topic": "string",
            },
            "s3_failure_path": "string",
        },
        "client_config": {
            "max_concurrent_invocations_per_instance": 0,
        },
    },
    data_capture_config={
        "capture_options": [{
            "capture_mode": "string",
        }],
        "destination_s3_uri": "string",
        "initial_sampling_percentage": 0,
        "capture_content_type_header": {
            "csv_content_types": ["string"],
            "json_content_types": ["string"],
        },
        "enable_capture": False,
        "kms_key_id": "string",
    },
    kms_key_arn="string",
    name="string",
    name_prefix="string",
    shadow_production_variants=[{
        "model_name": "string",
        "initial_variant_weight": 0,
        "managed_instance_scaling": {
            "max_instance_count": 0,
            "min_instance_count": 0,
            "status": "string",
        },
        "enable_ssm_access": False,
        "inference_ami_version": "string",
        "initial_instance_count": 0,
        "accelerator_type": "string",
        "instance_type": "string",
        "core_dump_config": {
            "destination_s3_uri": "string",
            "kms_key_id": "string",
        },
        "model_data_download_timeout_in_seconds": 0,
        "container_startup_health_check_timeout_in_seconds": 0,
        "routing_configs": [{
            "routing_strategy": "string",
        }],
        "serverless_config": {
            "max_concurrency": 0,
            "memory_size_in_mb": 0,
            "provisioned_concurrency": 0,
        },
        "variant_name": "string",
        "volume_size_in_gb": 0,
    }],
    tags={
        "string": "string",
    })

const endpointConfigurationResource = new aws.sagemaker.EndpointConfiguration("endpointConfigurationResource", {
    productionVariants: [{
        modelName: "string",
        initialVariantWeight: 0,
        managedInstanceScaling: {
            maxInstanceCount: 0,
            minInstanceCount: 0,
            status: "string",
        },
        enableSsmAccess: false,
        inferenceAmiVersion: "string",
        initialInstanceCount: 0,
        acceleratorType: "string",
        instanceType: "string",
        coreDumpConfig: {
            destinationS3Uri: "string",
            kmsKeyId: "string",
        },
        modelDataDownloadTimeoutInSeconds: 0,
        containerStartupHealthCheckTimeoutInSeconds: 0,
        routingConfigs: [{
            routingStrategy: "string",
        }],
        serverlessConfig: {
            maxConcurrency: 0,
            memorySizeInMb: 0,
            provisionedConcurrency: 0,
        },
        variantName: "string",
        volumeSizeInGb: 0,
    }],
    asyncInferenceConfig: {
        outputConfig: {
            s3OutputPath: "string",
            kmsKeyId: "string",
            notificationConfig: {
                errorTopic: "string",
                includeInferenceResponseIns: ["string"],
                successTopic: "string",
            },
            s3FailurePath: "string",
        },
        clientConfig: {
            maxConcurrentInvocationsPerInstance: 0,
        },
    },
    dataCaptureConfig: {
        captureOptions: [{
            captureMode: "string",
        }],
        destinationS3Uri: "string",
        initialSamplingPercentage: 0,
        captureContentTypeHeader: {
            csvContentTypes: ["string"],
            jsonContentTypes: ["string"],
        },
        enableCapture: false,
        kmsKeyId: "string",
    },
    kmsKeyArn: "string",
    name: "string",
    namePrefix: "string",
    shadowProductionVariants: [{
        modelName: "string",
        initialVariantWeight: 0,
        managedInstanceScaling: {
            maxInstanceCount: 0,
            minInstanceCount: 0,
            status: "string",
        },
        enableSsmAccess: false,
        inferenceAmiVersion: "string",
        initialInstanceCount: 0,
        acceleratorType: "string",
        instanceType: "string",
        coreDumpConfig: {
            destinationS3Uri: "string",
            kmsKeyId: "string",
        },
        modelDataDownloadTimeoutInSeconds: 0,
        containerStartupHealthCheckTimeoutInSeconds: 0,
        routingConfigs: [{
            routingStrategy: "string",
        }],
        serverlessConfig: {
            maxConcurrency: 0,
            memorySizeInMb: 0,
            provisionedConcurrency: 0,
        },
        variantName: "string",
        volumeSizeInGb: 0,
    }],
    tags: {
        string: "string",
    },
});

type: aws:sagemaker:EndpointConfiguration
properties:
    asyncInferenceConfig:
        clientConfig:
            maxConcurrentInvocationsPerInstance: 0
        outputConfig:
            kmsKeyId: string
            notificationConfig:
                errorTopic: string
                includeInferenceResponseIns:
                    - string
                successTopic: string
            s3FailurePath: string
            s3OutputPath: string
    dataCaptureConfig:
        captureContentTypeHeader:
            csvContentTypes:
                - string
            jsonContentTypes:
                - string
        captureOptions:
            - captureMode: string
        destinationS3Uri: string
        enableCapture: false
        initialSamplingPercentage: 0
        kmsKeyId: string
    kmsKeyArn: string
    name: string
    namePrefix: string
    productionVariants:
        - acceleratorType: string
          containerStartupHealthCheckTimeoutInSeconds: 0
          coreDumpConfig:
            destinationS3Uri: string
            kmsKeyId: string
          enableSsmAccess: false
          inferenceAmiVersion: string
          initialInstanceCount: 0
          initialVariantWeight: 0
          instanceType: string
          managedInstanceScaling:
            maxInstanceCount: 0
            minInstanceCount: 0
            status: string
          modelDataDownloadTimeoutInSeconds: 0
          modelName: string
          routingConfigs:
            - routingStrategy: string
          serverlessConfig:
            maxConcurrency: 0
            memorySizeInMb: 0
            provisionedConcurrency: 0
          variantName: string
          volumeSizeInGb: 0
    shadowProductionVariants:
        - acceleratorType: string
          containerStartupHealthCheckTimeoutInSeconds: 0
          coreDumpConfig:
            destinationS3Uri: string
            kmsKeyId: string
          enableSsmAccess: false
          inferenceAmiVersion: string
          initialInstanceCount: 0
          initialVariantWeight: 0
          instanceType: string
          managedInstanceScaling:
            maxInstanceCount: 0
            minInstanceCount: 0
            status: string
          modelDataDownloadTimeoutInSeconds: 0
          modelName: string
          routingConfigs:
            - routingStrategy: string
          serverlessConfig:
            maxConcurrency: 0
            memorySizeInMb: 0
            provisionedConcurrency: 0
          variantName: string
          volumeSizeInGb: 0
    tags:
        string: string

EndpointConfiguration Resource Properties

To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.

Inputs

In Python, inputs that are objects can be passed either as argument classes or as dictionary literals.

The EndpointConfiguration resource accepts the following input properties:

ProductionVariants List<EndpointConfigurationProductionVariant>: An list of ProductionVariant objects, one for each model that you want to host at this endpoint. Fields are documented below.
AsyncInferenceConfig EndpointConfigurationAsyncInferenceConfig: Specifies configuration for how an endpoint performs asynchronous inference.
DataCaptureConfig EndpointConfigurationDataCaptureConfig: Specifies the parameters to capture input/output of SageMaker models endpoints. Fields are documented below.
KmsKeyArn string: Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
Name string: The name of the endpoint configuration. If omitted, this provider will assign a random, unique name. Conflicts with name_prefix.
NamePrefix string: Creates a unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
ShadowProductionVariants List<EndpointConfigurationShadowProductionVariant>: Array of ProductionVariant objects. There is one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on ProductionVariants. If you use this field, you can only specify one variant for ProductionVariants and one variant for ShadowProductionVariants. Fields are documented below.
Tags Dictionary<string, string>: A mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

ProductionVariants []EndpointConfigurationProductionVariantArgs: An list of ProductionVariant objects, one for each model that you want to host at this endpoint. Fields are documented below.
AsyncInferenceConfig EndpointConfigurationAsyncInferenceConfigArgs: Specifies configuration for how an endpoint performs asynchronous inference.
DataCaptureConfig EndpointConfigurationDataCaptureConfigArgs: Specifies the parameters to capture input/output of SageMaker models endpoints. Fields are documented below.
KmsKeyArn string: Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
Name string: The name of the endpoint configuration. If omitted, this provider will assign a random, unique name. Conflicts with name_prefix.
NamePrefix string: Creates a unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
ShadowProductionVariants []EndpointConfigurationShadowProductionVariantArgs: Array of ProductionVariant objects. There is one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on ProductionVariants. If you use this field, you can only specify one variant for ProductionVariants and one variant for ShadowProductionVariants. Fields are documented below.
Tags map[string]string: A mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

productionVariants List<EndpointConfigurationProductionVariant>: An list of ProductionVariant objects, one for each model that you want to host at this endpoint. Fields are documented below.
asyncInferenceConfig EndpointConfigurationAsyncInferenceConfig: Specifies configuration for how an endpoint performs asynchronous inference.
dataCaptureConfig EndpointConfigurationDataCaptureConfig: Specifies the parameters to capture input/output of SageMaker models endpoints. Fields are documented below.
kmsKeyArn String: Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
name String: The name of the endpoint configuration. If omitted, this provider will assign a random, unique name. Conflicts with name_prefix.
namePrefix String: Creates a unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
shadowProductionVariants List<EndpointConfigurationShadowProductionVariant>: Array of ProductionVariant objects. There is one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on ProductionVariants. If you use this field, you can only specify one variant for ProductionVariants and one variant for ShadowProductionVariants. Fields are documented below.
tags Map<String,String>: A mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

productionVariants EndpointConfigurationProductionVariant[]: An list of ProductionVariant objects, one for each model that you want to host at this endpoint. Fields are documented below.
asyncInferenceConfig EndpointConfigurationAsyncInferenceConfig: Specifies configuration for how an endpoint performs asynchronous inference.
dataCaptureConfig EndpointConfigurationDataCaptureConfig: Specifies the parameters to capture input/output of SageMaker models endpoints. Fields are documented below.
kmsKeyArn string: Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
name string: The name of the endpoint configuration. If omitted, this provider will assign a random, unique name. Conflicts with name_prefix.
namePrefix string: Creates a unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
shadowProductionVariants EndpointConfigurationShadowProductionVariant[]: Array of ProductionVariant objects. There is one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on ProductionVariants. If you use this field, you can only specify one variant for ProductionVariants and one variant for ShadowProductionVariants. Fields are documented below.
tags {[key: string]: string}: A mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

production_variants Sequence[EndpointConfigurationProductionVariantArgs]: An list of ProductionVariant objects, one for each model that you want to host at this endpoint. Fields are documented below.
async_inference_config EndpointConfigurationAsyncInferenceConfigArgs: Specifies configuration for how an endpoint performs asynchronous inference.
data_capture_config EndpointConfigurationDataCaptureConfigArgs: Specifies the parameters to capture input/output of SageMaker models endpoints. Fields are documented below.
kms_key_arn str: Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
name str: The name of the endpoint configuration. If omitted, this provider will assign a random, unique name. Conflicts with name_prefix.
name_prefix str: Creates a unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
shadow_production_variants Sequence[EndpointConfigurationShadowProductionVariantArgs]: Array of ProductionVariant objects. There is one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on ProductionVariants. If you use this field, you can only specify one variant for ProductionVariants and one variant for ShadowProductionVariants. Fields are documented below.
tags Mapping[str, str]: A mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

productionVariants List<Property Map>: An list of ProductionVariant objects, one for each model that you want to host at this endpoint. Fields are documented below.
asyncInferenceConfig Property Map: Specifies configuration for how an endpoint performs asynchronous inference.
dataCaptureConfig Property Map: Specifies the parameters to capture input/output of SageMaker models endpoints. Fields are documented below.
kmsKeyArn String: Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
name String: The name of the endpoint configuration. If omitted, this provider will assign a random, unique name. Conflicts with name_prefix.
namePrefix String: Creates a unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
shadowProductionVariants List<Property Map>: Array of ProductionVariant objects. There is one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on ProductionVariants. If you use this field, you can only specify one variant for ProductionVariants and one variant for ShadowProductionVariants. Fields are documented below.
tags Map<String>: A mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

Outputs

All input properties are implicitly available as output properties. Additionally, the EndpointConfiguration resource produces the following output properties:

Arn string: The Amazon Resource Name (ARN) assigned by AWS to this endpoint configuration.
Id string: The provider-assigned unique ID for this managed resource.
TagsAll Dictionary<string, string>: A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.
Deprecated: Please use tags instead.

Arn string: The Amazon Resource Name (ARN) assigned by AWS to this endpoint configuration.
Id string: The provider-assigned unique ID for this managed resource.
TagsAll map[string]string: A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.
Deprecated: Please use tags instead.

arn String: The Amazon Resource Name (ARN) assigned by AWS to this endpoint configuration.
id String: The provider-assigned unique ID for this managed resource.
tagsAll Map<String,String>: A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.
Deprecated: Please use tags instead.

arn string: The Amazon Resource Name (ARN) assigned by AWS to this endpoint configuration.
id string: The provider-assigned unique ID for this managed resource.
tagsAll {[key: string]: string}: A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.
Deprecated: Please use tags instead.

arn str: The Amazon Resource Name (ARN) assigned by AWS to this endpoint configuration.
id str: The provider-assigned unique ID for this managed resource.
tags_all Mapping[str, str]: A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.
Deprecated: Please use tags instead.

arn String: The Amazon Resource Name (ARN) assigned by AWS to this endpoint configuration.
id String: The provider-assigned unique ID for this managed resource.
tagsAll Map<String>: A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.
Deprecated: Please use tags instead.

Look up Existing EndpointConfiguration Resource

Get an existing EndpointConfiguration resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.

public static get(name: string, id: Input<ID>, state?: EndpointConfigurationState, opts?: CustomResourceOptions): EndpointConfiguration

@staticmethod
def get(resource_name: str,
        id: str,
        opts: Optional[ResourceOptions] = None,
        arn: Optional[str] = None,
        async_inference_config: Optional[EndpointConfigurationAsyncInferenceConfigArgs] = None,
        data_capture_config: Optional[EndpointConfigurationDataCaptureConfigArgs] = None,
        kms_key_arn: Optional[str] = None,
        name: Optional[str] = None,
        name_prefix: Optional[str] = None,
        production_variants: Optional[Sequence[EndpointConfigurationProductionVariantArgs]] = None,
        shadow_production_variants: Optional[Sequence[EndpointConfigurationShadowProductionVariantArgs]] = None,
        tags: Optional[Mapping[str, str]] = None,
        tags_all: Optional[Mapping[str, str]] = None) -> EndpointConfiguration

func GetEndpointConfiguration(ctx *Context, name string, id IDInput, state *EndpointConfigurationState, opts ...ResourceOption) (*EndpointConfiguration, error)

public static EndpointConfiguration Get(string name, Input<string> id, EndpointConfigurationState? state, CustomResourceOptions? opts = null)

public static EndpointConfiguration get(String name, Output<String> id, EndpointConfigurationState state, CustomResourceOptions options)

resources:  _:    type: aws:sagemaker:EndpointConfiguration    get:      id: ${id}

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

resource_name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

The following state arguments are supported:

Arn string: The Amazon Resource Name (ARN) assigned by AWS to this endpoint configuration.
AsyncInferenceConfig EndpointConfigurationAsyncInferenceConfig: Specifies configuration for how an endpoint performs asynchronous inference.
DataCaptureConfig EndpointConfigurationDataCaptureConfig: Specifies the parameters to capture input/output of SageMaker models endpoints. Fields are documented below.
KmsKeyArn string: Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
Name string: The name of the endpoint configuration. If omitted, this provider will assign a random, unique name. Conflicts with name_prefix.
NamePrefix string: Creates a unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
ProductionVariants List<EndpointConfigurationProductionVariant>: An list of ProductionVariant objects, one for each model that you want to host at this endpoint. Fields are documented below.
ShadowProductionVariants List<EndpointConfigurationShadowProductionVariant>: Array of ProductionVariant objects. There is one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on ProductionVariants. If you use this field, you can only specify one variant for ProductionVariants and one variant for ShadowProductionVariants. Fields are documented below.
Tags Dictionary<string, string>: A mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
TagsAll Dictionary<string, string>: A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.
Deprecated: Please use tags instead.

Arn string: The Amazon Resource Name (ARN) assigned by AWS to this endpoint configuration.
AsyncInferenceConfig EndpointConfigurationAsyncInferenceConfigArgs: Specifies configuration for how an endpoint performs asynchronous inference.
DataCaptureConfig EndpointConfigurationDataCaptureConfigArgs: Specifies the parameters to capture input/output of SageMaker models endpoints. Fields are documented below.
KmsKeyArn string: Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
Name string: The name of the endpoint configuration. If omitted, this provider will assign a random, unique name. Conflicts with name_prefix.
NamePrefix string: Creates a unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
ProductionVariants []EndpointConfigurationProductionVariantArgs: An list of ProductionVariant objects, one for each model that you want to host at this endpoint. Fields are documented below.
ShadowProductionVariants []EndpointConfigurationShadowProductionVariantArgs: Array of ProductionVariant objects. There is one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on ProductionVariants. If you use this field, you can only specify one variant for ProductionVariants and one variant for ShadowProductionVariants. Fields are documented below.
Tags map[string]string: A mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
TagsAll map[string]string: A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.
Deprecated: Please use tags instead.

arn String: The Amazon Resource Name (ARN) assigned by AWS to this endpoint configuration.
asyncInferenceConfig EndpointConfigurationAsyncInferenceConfig: Specifies configuration for how an endpoint performs asynchronous inference.
dataCaptureConfig EndpointConfigurationDataCaptureConfig: Specifies the parameters to capture input/output of SageMaker models endpoints. Fields are documented below.
kmsKeyArn String: Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
name String: The name of the endpoint configuration. If omitted, this provider will assign a random, unique name. Conflicts with name_prefix.
namePrefix String: Creates a unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
productionVariants List<EndpointConfigurationProductionVariant>: An list of ProductionVariant objects, one for each model that you want to host at this endpoint. Fields are documented below.
shadowProductionVariants List<EndpointConfigurationShadowProductionVariant>: Array of ProductionVariant objects. There is one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on ProductionVariants. If you use this field, you can only specify one variant for ProductionVariants and one variant for ShadowProductionVariants. Fields are documented below.
tags Map<String,String>: A mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
tagsAll Map<String,String>: A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.
Deprecated: Please use tags instead.

arn string: The Amazon Resource Name (ARN) assigned by AWS to this endpoint configuration.
asyncInferenceConfig EndpointConfigurationAsyncInferenceConfig: Specifies configuration for how an endpoint performs asynchronous inference.
dataCaptureConfig EndpointConfigurationDataCaptureConfig: Specifies the parameters to capture input/output of SageMaker models endpoints. Fields are documented below.
kmsKeyArn string: Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
name string: The name of the endpoint configuration. If omitted, this provider will assign a random, unique name. Conflicts with name_prefix.
namePrefix string: Creates a unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
productionVariants EndpointConfigurationProductionVariant[]: An list of ProductionVariant objects, one for each model that you want to host at this endpoint. Fields are documented below.
shadowProductionVariants EndpointConfigurationShadowProductionVariant[]: Array of ProductionVariant objects. There is one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on ProductionVariants. If you use this field, you can only specify one variant for ProductionVariants and one variant for ShadowProductionVariants. Fields are documented below.
tags {[key: string]: string}: A mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
tagsAll {[key: string]: string}: A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.
Deprecated: Please use tags instead.

arn str: The Amazon Resource Name (ARN) assigned by AWS to this endpoint configuration.
async_inference_config EndpointConfigurationAsyncInferenceConfigArgs: Specifies configuration for how an endpoint performs asynchronous inference.
data_capture_config EndpointConfigurationDataCaptureConfigArgs: Specifies the parameters to capture input/output of SageMaker models endpoints. Fields are documented below.
kms_key_arn str: Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
name str: The name of the endpoint configuration. If omitted, this provider will assign a random, unique name. Conflicts with name_prefix.
name_prefix str: Creates a unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
production_variants Sequence[EndpointConfigurationProductionVariantArgs]: An list of ProductionVariant objects, one for each model that you want to host at this endpoint. Fields are documented below.
shadow_production_variants Sequence[EndpointConfigurationShadowProductionVariantArgs]: Array of ProductionVariant objects. There is one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on ProductionVariants. If you use this field, you can only specify one variant for ProductionVariants and one variant for ShadowProductionVariants. Fields are documented below.
tags Mapping[str, str]: A mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
tags_all Mapping[str, str]: A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.
Deprecated: Please use tags instead.

arn String: The Amazon Resource Name (ARN) assigned by AWS to this endpoint configuration.
asyncInferenceConfig Property Map: Specifies configuration for how an endpoint performs asynchronous inference.
dataCaptureConfig Property Map: Specifies the parameters to capture input/output of SageMaker models endpoints. Fields are documented below.
kmsKeyArn String: Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
name String: The name of the endpoint configuration. If omitted, this provider will assign a random, unique name. Conflicts with name_prefix.
namePrefix String: Creates a unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
productionVariants List<Property Map>: An list of ProductionVariant objects, one for each model that you want to host at this endpoint. Fields are documented below.
shadowProductionVariants List<Property Map>: Array of ProductionVariant objects. There is one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on ProductionVariants. If you use this field, you can only specify one variant for ProductionVariants and one variant for ShadowProductionVariants. Fields are documented below.
tags Map<String>: A mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
tagsAll Map<String>: A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.
Deprecated: Please use tags instead.

Supporting Types

EndpointConfigurationAsyncInferenceConfig, EndpointConfigurationAsyncInferenceConfigArgs

OutputConfig EndpointConfigurationAsyncInferenceConfigOutputConfig: Specifies the configuration for asynchronous inference invocation outputs.
ClientConfig EndpointConfigurationAsyncInferenceConfigClientConfig: Configures the behavior of the client used by Amazon SageMaker to interact with the model container during asynchronous inference.

OutputConfig EndpointConfigurationAsyncInferenceConfigOutputConfig: Specifies the configuration for asynchronous inference invocation outputs.
ClientConfig EndpointConfigurationAsyncInferenceConfigClientConfig: Configures the behavior of the client used by Amazon SageMaker to interact with the model container during asynchronous inference.

outputConfig EndpointConfigurationAsyncInferenceConfigOutputConfig: Specifies the configuration for asynchronous inference invocation outputs.
clientConfig EndpointConfigurationAsyncInferenceConfigClientConfig: Configures the behavior of the client used by Amazon SageMaker to interact with the model container during asynchronous inference.

outputConfig EndpointConfigurationAsyncInferenceConfigOutputConfig: Specifies the configuration for asynchronous inference invocation outputs.
clientConfig EndpointConfigurationAsyncInferenceConfigClientConfig: Configures the behavior of the client used by Amazon SageMaker to interact with the model container during asynchronous inference.

output_config EndpointConfigurationAsyncInferenceConfigOutputConfig: Specifies the configuration for asynchronous inference invocation outputs.
client_config EndpointConfigurationAsyncInferenceConfigClientConfig: Configures the behavior of the client used by Amazon SageMaker to interact with the model container during asynchronous inference.

outputConfig Property Map: Specifies the configuration for asynchronous inference invocation outputs.
clientConfig Property Map: Configures the behavior of the client used by Amazon SageMaker to interact with the model container during asynchronous inference.

EndpointConfigurationAsyncInferenceConfigClientConfig, EndpointConfigurationAsyncInferenceConfigClientConfigArgs

MaxConcurrentInvocationsPerInstance int: The maximum number of concurrent requests sent by the SageMaker client to the model container. If no value is provided, Amazon SageMaker will choose an optimal value for you.

MaxConcurrentInvocationsPerInstance int: The maximum number of concurrent requests sent by the SageMaker client to the model container. If no value is provided, Amazon SageMaker will choose an optimal value for you.

maxConcurrentInvocationsPerInstance Integer: The maximum number of concurrent requests sent by the SageMaker client to the model container. If no value is provided, Amazon SageMaker will choose an optimal value for you.

maxConcurrentInvocationsPerInstance number: The maximum number of concurrent requests sent by the SageMaker client to the model container. If no value is provided, Amazon SageMaker will choose an optimal value for you.

max_concurrent_invocations_per_instance int: The maximum number of concurrent requests sent by the SageMaker client to the model container. If no value is provided, Amazon SageMaker will choose an optimal value for you.

maxConcurrentInvocationsPerInstance Number: The maximum number of concurrent requests sent by the SageMaker client to the model container. If no value is provided, Amazon SageMaker will choose an optimal value for you.

EndpointConfigurationAsyncInferenceConfigOutputConfig, EndpointConfigurationAsyncInferenceConfigOutputConfigArgs

S3OutputPath string: The Amazon S3 location to upload inference responses to.
KmsKeyId string: The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that Amazon SageMaker uses to encrypt the asynchronous inference output in Amazon S3.
NotificationConfig EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfig: Specifies the configuration for notifications of inference results for asynchronous inference.
S3FailurePath string: The Amazon S3 location to upload failure inference responses to.

S3OutputPath string: The Amazon S3 location to upload inference responses to.
KmsKeyId string: The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that Amazon SageMaker uses to encrypt the asynchronous inference output in Amazon S3.
NotificationConfig EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfig: Specifies the configuration for notifications of inference results for asynchronous inference.
S3FailurePath string: The Amazon S3 location to upload failure inference responses to.

s3OutputPath String: The Amazon S3 location to upload inference responses to.
kmsKeyId String: The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that Amazon SageMaker uses to encrypt the asynchronous inference output in Amazon S3.
notificationConfig EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfig: Specifies the configuration for notifications of inference results for asynchronous inference.
s3FailurePath String: The Amazon S3 location to upload failure inference responses to.

s3OutputPath string: The Amazon S3 location to upload inference responses to.
kmsKeyId string: The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that Amazon SageMaker uses to encrypt the asynchronous inference output in Amazon S3.
notificationConfig EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfig: Specifies the configuration for notifications of inference results for asynchronous inference.
s3FailurePath string: The Amazon S3 location to upload failure inference responses to.

s3_output_path str: The Amazon S3 location to upload inference responses to.
kms_key_id str: The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that Amazon SageMaker uses to encrypt the asynchronous inference output in Amazon S3.
notification_config EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfig: Specifies the configuration for notifications of inference results for asynchronous inference.
s3_failure_path str: The Amazon S3 location to upload failure inference responses to.

s3OutputPath String: The Amazon S3 location to upload inference responses to.
kmsKeyId String: The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that Amazon SageMaker uses to encrypt the asynchronous inference output in Amazon S3.
notificationConfig Property Map: Specifies the configuration for notifications of inference results for asynchronous inference.
s3FailurePath String: The Amazon S3 location to upload failure inference responses to.

EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfig, EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArgs

ErrorTopic string: Amazon SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
IncludeInferenceResponseIns List<string>: The Amazon SNS topics where you want the inference response to be included. Valid values are SUCCESS_NOTIFICATION_TOPIC and ERROR_NOTIFICATION_TOPIC.
SuccessTopic string: Amazon SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.

ErrorTopic string: Amazon SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
IncludeInferenceResponseIns []string: The Amazon SNS topics where you want the inference response to be included. Valid values are SUCCESS_NOTIFICATION_TOPIC and ERROR_NOTIFICATION_TOPIC.
SuccessTopic string: Amazon SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.

errorTopic String: Amazon SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
includeInferenceResponseIns List<String>: The Amazon SNS topics where you want the inference response to be included. Valid values are SUCCESS_NOTIFICATION_TOPIC and ERROR_NOTIFICATION_TOPIC.
successTopic String: Amazon SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.

errorTopic string: Amazon SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
includeInferenceResponseIns string[]: The Amazon SNS topics where you want the inference response to be included. Valid values are SUCCESS_NOTIFICATION_TOPIC and ERROR_NOTIFICATION_TOPIC.
successTopic string: Amazon SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.

error_topic str: Amazon SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
include_inference_response_ins Sequence[str]: The Amazon SNS topics where you want the inference response to be included. Valid values are SUCCESS_NOTIFICATION_TOPIC and ERROR_NOTIFICATION_TOPIC.
success_topic str: Amazon SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.

errorTopic String: Amazon SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
includeInferenceResponseIns List<String>: The Amazon SNS topics where you want the inference response to be included. Valid values are SUCCESS_NOTIFICATION_TOPIC and ERROR_NOTIFICATION_TOPIC.
successTopic String: Amazon SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.

EndpointConfigurationDataCaptureConfig, EndpointConfigurationDataCaptureConfigArgs

CaptureOptions List<EndpointConfigurationDataCaptureConfigCaptureOption>: Specifies what data to capture. Fields are documented below.
DestinationS3Uri string: The URL for S3 location where the captured data is stored.
InitialSamplingPercentage int: Portion of data to capture. Should be between 0 and 100.
CaptureContentTypeHeader EndpointConfigurationDataCaptureConfigCaptureContentTypeHeader: The content type headers to capture. See capture_content_type_header below.
EnableCapture bool: Flag to enable data capture. Defaults to false.
KmsKeyId string: Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt the captured data on Amazon S3.

CaptureOptions []EndpointConfigurationDataCaptureConfigCaptureOption: Specifies what data to capture. Fields are documented below.
DestinationS3Uri string: The URL for S3 location where the captured data is stored.
InitialSamplingPercentage int: Portion of data to capture. Should be between 0 and 100.
CaptureContentTypeHeader EndpointConfigurationDataCaptureConfigCaptureContentTypeHeader: The content type headers to capture. See capture_content_type_header below.
EnableCapture bool: Flag to enable data capture. Defaults to false.
KmsKeyId string: Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt the captured data on Amazon S3.

captureOptions List<EndpointConfigurationDataCaptureConfigCaptureOption>: Specifies what data to capture. Fields are documented below.
destinationS3Uri String: The URL for S3 location where the captured data is stored.
initialSamplingPercentage Integer: Portion of data to capture. Should be between 0 and 100.
captureContentTypeHeader EndpointConfigurationDataCaptureConfigCaptureContentTypeHeader: The content type headers to capture. See capture_content_type_header below.
enableCapture Boolean: Flag to enable data capture. Defaults to false.
kmsKeyId String: Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt the captured data on Amazon S3.

captureOptions EndpointConfigurationDataCaptureConfigCaptureOption[]: Specifies what data to capture. Fields are documented below.
destinationS3Uri string: The URL for S3 location where the captured data is stored.
initialSamplingPercentage number: Portion of data to capture. Should be between 0 and 100.
captureContentTypeHeader EndpointConfigurationDataCaptureConfigCaptureContentTypeHeader: The content type headers to capture. See capture_content_type_header below.
enableCapture boolean: Flag to enable data capture. Defaults to false.
kmsKeyId string: Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt the captured data on Amazon S3.

capture_options Sequence[EndpointConfigurationDataCaptureConfigCaptureOption]: Specifies what data to capture. Fields are documented below.
destination_s3_uri str: The URL for S3 location where the captured data is stored.
initial_sampling_percentage int: Portion of data to capture. Should be between 0 and 100.
capture_content_type_header EndpointConfigurationDataCaptureConfigCaptureContentTypeHeader: The content type headers to capture. See capture_content_type_header below.
enable_capture bool: Flag to enable data capture. Defaults to false.
kms_key_id str: Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt the captured data on Amazon S3.

captureOptions List<Property Map>: Specifies what data to capture. Fields are documented below.
destinationS3Uri String: The URL for S3 location where the captured data is stored.
initialSamplingPercentage Number: Portion of data to capture. Should be between 0 and 100.
captureContentTypeHeader Property Map: The content type headers to capture. See capture_content_type_header below.
enableCapture Boolean: Flag to enable data capture. Defaults to false.
kmsKeyId String: Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt the captured data on Amazon S3.

EndpointConfigurationDataCaptureConfigCaptureContentTypeHeader, EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgs

CsvContentTypes List<string>: The CSV content type headers to capture. One of csv_content_types or json_content_types is required.
JsonContentTypes List<string>: The JSON content type headers to capture. One of json_content_types or csv_content_types is required.

CsvContentTypes []string: The CSV content type headers to capture. One of csv_content_types or json_content_types is required.
JsonContentTypes []string: The JSON content type headers to capture. One of json_content_types or csv_content_types is required.

csvContentTypes List<String>: The CSV content type headers to capture. One of csv_content_types or json_content_types is required.
jsonContentTypes List<String>: The JSON content type headers to capture. One of json_content_types or csv_content_types is required.

csvContentTypes string[]: The CSV content type headers to capture. One of csv_content_types or json_content_types is required.
jsonContentTypes string[]: The JSON content type headers to capture. One of json_content_types or csv_content_types is required.

csv_content_types Sequence[str]: The CSV content type headers to capture. One of csv_content_types or json_content_types is required.
json_content_types Sequence[str]: The JSON content type headers to capture. One of json_content_types or csv_content_types is required.

csvContentTypes List<String>: The CSV content type headers to capture. One of csv_content_types or json_content_types is required.
jsonContentTypes List<String>: The JSON content type headers to capture. One of json_content_types or csv_content_types is required.

EndpointConfigurationDataCaptureConfigCaptureOption, EndpointConfigurationDataCaptureConfigCaptureOptionArgs

CaptureMode string: Specifies the data to be captured. Should be one of Input, Output or InputAndOutput.

CaptureMode string: Specifies the data to be captured. Should be one of Input, Output or InputAndOutput.

captureMode String: Specifies the data to be captured. Should be one of Input, Output or InputAndOutput.

captureMode string: Specifies the data to be captured. Should be one of Input, Output or InputAndOutput.

capture_mode str: Specifies the data to be captured. Should be one of Input, Output or InputAndOutput.

captureMode String: Specifies the data to be captured. Should be one of Input, Output or InputAndOutput.

EndpointConfigurationProductionVariant, EndpointConfigurationProductionVariantArgs

ModelName string: The name of the model to use.
AcceleratorType string: The size of the Elastic Inference (EI) instance to use for the production variant.
ContainerStartupHealthCheckTimeoutInSeconds int: The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
CoreDumpConfig EndpointConfigurationProductionVariantCoreDumpConfig: Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
EnableSsmAccess bool: You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
InferenceAmiVersion string: Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
InitialInstanceCount int: Initial number of instances used for auto-scaling.
InitialVariantWeight double: Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to 1.0.
InstanceType string: The type of instance to start.
ManagedInstanceScaling EndpointConfigurationProductionVariantManagedInstanceScaling: Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
ModelDataDownloadTimeoutInSeconds int: The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
RoutingConfigs List<EndpointConfigurationProductionVariantRoutingConfig>: Sets how the endpoint routes incoming traffic. See routing_config below.
ServerlessConfig EndpointConfigurationProductionVariantServerlessConfig: Specifies configuration for how an endpoint performs asynchronous inference.
VariantName string: The name of the variant. If omitted, this provider will assign a random, unique name.
VolumeSizeInGb int: The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.

ModelName string: The name of the model to use.
AcceleratorType string: The size of the Elastic Inference (EI) instance to use for the production variant.
ContainerStartupHealthCheckTimeoutInSeconds int: The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
CoreDumpConfig EndpointConfigurationProductionVariantCoreDumpConfig: Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
EnableSsmAccess bool: You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
InferenceAmiVersion string: Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
InitialInstanceCount int: Initial number of instances used for auto-scaling.
InitialVariantWeight float64: Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to 1.0.
InstanceType string: The type of instance to start.
ManagedInstanceScaling EndpointConfigurationProductionVariantManagedInstanceScaling: Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
ModelDataDownloadTimeoutInSeconds int: The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
RoutingConfigs []EndpointConfigurationProductionVariantRoutingConfig: Sets how the endpoint routes incoming traffic. See routing_config below.
ServerlessConfig EndpointConfigurationProductionVariantServerlessConfig: Specifies configuration for how an endpoint performs asynchronous inference.
VariantName string: The name of the variant. If omitted, this provider will assign a random, unique name.
VolumeSizeInGb int: The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.

modelName String: The name of the model to use.
acceleratorType String: The size of the Elastic Inference (EI) instance to use for the production variant.
containerStartupHealthCheckTimeoutInSeconds Integer: The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
coreDumpConfig EndpointConfigurationProductionVariantCoreDumpConfig: Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
enableSsmAccess Boolean: You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
inferenceAmiVersion String: Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
initialInstanceCount Integer: Initial number of instances used for auto-scaling.
initialVariantWeight Double: Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to 1.0.
instanceType String: The type of instance to start.
managedInstanceScaling EndpointConfigurationProductionVariantManagedInstanceScaling: Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
modelDataDownloadTimeoutInSeconds Integer: The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
routingConfigs List<EndpointConfigurationProductionVariantRoutingConfig>: Sets how the endpoint routes incoming traffic. See routing_config below.
serverlessConfig EndpointConfigurationProductionVariantServerlessConfig: Specifies configuration for how an endpoint performs asynchronous inference.
variantName String: The name of the variant. If omitted, this provider will assign a random, unique name.
volumeSizeInGb Integer: The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.

modelName string: The name of the model to use.
acceleratorType string: The size of the Elastic Inference (EI) instance to use for the production variant.
containerStartupHealthCheckTimeoutInSeconds number: The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
coreDumpConfig EndpointConfigurationProductionVariantCoreDumpConfig: Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
enableSsmAccess boolean: You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
inferenceAmiVersion string: Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
initialInstanceCount number: Initial number of instances used for auto-scaling.
initialVariantWeight number: Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to 1.0.
instanceType string: The type of instance to start.
managedInstanceScaling EndpointConfigurationProductionVariantManagedInstanceScaling: Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
modelDataDownloadTimeoutInSeconds number: The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
routingConfigs EndpointConfigurationProductionVariantRoutingConfig[]: Sets how the endpoint routes incoming traffic. See routing_config below.
serverlessConfig EndpointConfigurationProductionVariantServerlessConfig: Specifies configuration for how an endpoint performs asynchronous inference.
variantName string: The name of the variant. If omitted, this provider will assign a random, unique name.
volumeSizeInGb number: The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.

model_name str: The name of the model to use.
accelerator_type str: The size of the Elastic Inference (EI) instance to use for the production variant.
container_startup_health_check_timeout_in_seconds int: The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
core_dump_config EndpointConfigurationProductionVariantCoreDumpConfig: Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
enable_ssm_access bool: You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
inference_ami_version str: Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
initial_instance_count int: Initial number of instances used for auto-scaling.
initial_variant_weight float: Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to 1.0.
instance_type str: The type of instance to start.
managed_instance_scaling EndpointConfigurationProductionVariantManagedInstanceScaling: Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
model_data_download_timeout_in_seconds int: The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
routing_configs Sequence[EndpointConfigurationProductionVariantRoutingConfig]: Sets how the endpoint routes incoming traffic. See routing_config below.
serverless_config EndpointConfigurationProductionVariantServerlessConfig: Specifies configuration for how an endpoint performs asynchronous inference.
variant_name str: The name of the variant. If omitted, this provider will assign a random, unique name.
volume_size_in_gb int: The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.

modelName String: The name of the model to use.
acceleratorType String: The size of the Elastic Inference (EI) instance to use for the production variant.
containerStartupHealthCheckTimeoutInSeconds Number: The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
coreDumpConfig Property Map: Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
enableSsmAccess Boolean: You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
inferenceAmiVersion String: Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
initialInstanceCount Number: Initial number of instances used for auto-scaling.
initialVariantWeight Number: Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to 1.0.
instanceType String: The type of instance to start.
managedInstanceScaling Property Map: Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
modelDataDownloadTimeoutInSeconds Number: The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
routingConfigs List<Property Map>: Sets how the endpoint routes incoming traffic. See routing_config below.
serverlessConfig Property Map: Specifies configuration for how an endpoint performs asynchronous inference.
variantName String: The name of the variant. If omitted, this provider will assign a random, unique name.
volumeSizeInGb Number: The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.

EndpointConfigurationProductionVariantCoreDumpConfig, EndpointConfigurationProductionVariantCoreDumpConfigArgs

DestinationS3Uri string: The Amazon S3 bucket to send the core dump to.
KmsKeyId string: The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.

DestinationS3Uri string: The Amazon S3 bucket to send the core dump to.
KmsKeyId string: The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.

destinationS3Uri String: The Amazon S3 bucket to send the core dump to.
kmsKeyId String: The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.

destinationS3Uri string: The Amazon S3 bucket to send the core dump to.
kmsKeyId string: The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.

destination_s3_uri str: The Amazon S3 bucket to send the core dump to.
kms_key_id str: The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.

destinationS3Uri String: The Amazon S3 bucket to send the core dump to.
kmsKeyId String: The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.

EndpointConfigurationProductionVariantManagedInstanceScaling, EndpointConfigurationProductionVariantManagedInstanceScalingArgs

MaxInstanceCount int: The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
MinInstanceCount int: The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
Status string: Indicates whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.

MaxInstanceCount int: The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
MinInstanceCount int: The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
Status string: Indicates whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.

maxInstanceCount Integer: The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
minInstanceCount Integer: The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
status String: Indicates whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.

maxInstanceCount number: The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
minInstanceCount number: The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
status string: Indicates whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.

max_instance_count int: The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
min_instance_count int: The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
status str: Indicates whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.

maxInstanceCount Number: The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
minInstanceCount Number: The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
status String: Indicates whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.

EndpointConfigurationProductionVariantRoutingConfig, EndpointConfigurationProductionVariantRoutingConfigArgs

RoutingStrategy string: Sets how the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.

RoutingStrategy string: Sets how the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.

routingStrategy String: Sets how the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.

routingStrategy string: Sets how the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.

routing_strategy str: Sets how the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.

routingStrategy String: Sets how the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.

EndpointConfigurationProductionVariantServerlessConfig, EndpointConfigurationProductionVariantServerlessConfigArgs

MaxConcurrency int: The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
MemorySizeInMb int: The memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
ProvisionedConcurrency int: The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.

MaxConcurrency int: The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
MemorySizeInMb int: The memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
ProvisionedConcurrency int: The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.

maxConcurrency Integer: The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
memorySizeInMb Integer: The memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
provisionedConcurrency Integer: The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.

maxConcurrency number: The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
memorySizeInMb number: The memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
provisionedConcurrency number: The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.

max_concurrency int: The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
memory_size_in_mb int: The memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
provisioned_concurrency int: The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.

maxConcurrency Number: The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
memorySizeInMb Number: The memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
provisionedConcurrency Number: The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.

EndpointConfigurationShadowProductionVariant, EndpointConfigurationShadowProductionVariantArgs

ModelName string: The name of the model to use.
AcceleratorType string: The size of the Elastic Inference (EI) instance to use for the production variant.
ContainerStartupHealthCheckTimeoutInSeconds int: The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
CoreDumpConfig EndpointConfigurationShadowProductionVariantCoreDumpConfig: Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
EnableSsmAccess bool: You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
InferenceAmiVersion string: Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
InitialInstanceCount int: Initial number of instances used for auto-scaling.
InitialVariantWeight double: Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to 1.0.
InstanceType string: The type of instance to start.
ManagedInstanceScaling EndpointConfigurationShadowProductionVariantManagedInstanceScaling: Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
ModelDataDownloadTimeoutInSeconds int: The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
RoutingConfigs List<EndpointConfigurationShadowProductionVariantRoutingConfig>: Sets how the endpoint routes incoming traffic. See routing_config below.
ServerlessConfig EndpointConfigurationShadowProductionVariantServerlessConfig: Specifies configuration for how an endpoint performs asynchronous inference.
VariantName string: The name of the variant. If omitted, this provider will assign a random, unique name.
VolumeSizeInGb int: The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.

ModelName string: The name of the model to use.
AcceleratorType string: The size of the Elastic Inference (EI) instance to use for the production variant.
ContainerStartupHealthCheckTimeoutInSeconds int: The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
CoreDumpConfig EndpointConfigurationShadowProductionVariantCoreDumpConfig: Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
EnableSsmAccess bool: You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
InferenceAmiVersion string: Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
InitialInstanceCount int: Initial number of instances used for auto-scaling.
InitialVariantWeight float64: Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to 1.0.
InstanceType string: The type of instance to start.
ManagedInstanceScaling EndpointConfigurationShadowProductionVariantManagedInstanceScaling: Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
ModelDataDownloadTimeoutInSeconds int: The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
RoutingConfigs []EndpointConfigurationShadowProductionVariantRoutingConfig: Sets how the endpoint routes incoming traffic. See routing_config below.
ServerlessConfig EndpointConfigurationShadowProductionVariantServerlessConfig: Specifies configuration for how an endpoint performs asynchronous inference.
VariantName string: The name of the variant. If omitted, this provider will assign a random, unique name.
VolumeSizeInGb int: The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.

modelName String: The name of the model to use.
acceleratorType String: The size of the Elastic Inference (EI) instance to use for the production variant.
containerStartupHealthCheckTimeoutInSeconds Integer: The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
coreDumpConfig EndpointConfigurationShadowProductionVariantCoreDumpConfig: Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
enableSsmAccess Boolean: You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
inferenceAmiVersion String: Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
initialInstanceCount Integer: Initial number of instances used for auto-scaling.
initialVariantWeight Double: Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to 1.0.
instanceType String: The type of instance to start.
managedInstanceScaling EndpointConfigurationShadowProductionVariantManagedInstanceScaling: Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
modelDataDownloadTimeoutInSeconds Integer: The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
routingConfigs List<EndpointConfigurationShadowProductionVariantRoutingConfig>: Sets how the endpoint routes incoming traffic. See routing_config below.
serverlessConfig EndpointConfigurationShadowProductionVariantServerlessConfig: Specifies configuration for how an endpoint performs asynchronous inference.
variantName String: The name of the variant. If omitted, this provider will assign a random, unique name.
volumeSizeInGb Integer: The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.

modelName string: The name of the model to use.
acceleratorType string: The size of the Elastic Inference (EI) instance to use for the production variant.
containerStartupHealthCheckTimeoutInSeconds number: The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
coreDumpConfig EndpointConfigurationShadowProductionVariantCoreDumpConfig: Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
enableSsmAccess boolean: You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
inferenceAmiVersion string: Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
initialInstanceCount number: Initial number of instances used for auto-scaling.
initialVariantWeight number: Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to 1.0.
instanceType string: The type of instance to start.
managedInstanceScaling EndpointConfigurationShadowProductionVariantManagedInstanceScaling: Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
modelDataDownloadTimeoutInSeconds number: The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
routingConfigs EndpointConfigurationShadowProductionVariantRoutingConfig[]: Sets how the endpoint routes incoming traffic. See routing_config below.
serverlessConfig EndpointConfigurationShadowProductionVariantServerlessConfig: Specifies configuration for how an endpoint performs asynchronous inference.
variantName string: The name of the variant. If omitted, this provider will assign a random, unique name.
volumeSizeInGb number: The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.

model_name str: The name of the model to use.
accelerator_type str: The size of the Elastic Inference (EI) instance to use for the production variant.
container_startup_health_check_timeout_in_seconds int: The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
core_dump_config EndpointConfigurationShadowProductionVariantCoreDumpConfig: Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
enable_ssm_access bool: You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
inference_ami_version str: Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
initial_instance_count int: Initial number of instances used for auto-scaling.
initial_variant_weight float: Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to 1.0.
instance_type str: The type of instance to start.
managed_instance_scaling EndpointConfigurationShadowProductionVariantManagedInstanceScaling: Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
model_data_download_timeout_in_seconds int: The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
routing_configs Sequence[EndpointConfigurationShadowProductionVariantRoutingConfig]: Sets how the endpoint routes incoming traffic. See routing_config below.
serverless_config EndpointConfigurationShadowProductionVariantServerlessConfig: Specifies configuration for how an endpoint performs asynchronous inference.
variant_name str: The name of the variant. If omitted, this provider will assign a random, unique name.
volume_size_in_gb int: The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.

modelName String: The name of the model to use.
acceleratorType String: The size of the Elastic Inference (EI) instance to use for the production variant.
containerStartupHealthCheckTimeoutInSeconds Number: The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
coreDumpConfig Property Map: Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
enableSsmAccess Boolean: You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
inferenceAmiVersion String: Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
initialInstanceCount Number: Initial number of instances used for auto-scaling.
initialVariantWeight Number: Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to 1.0.
instanceType String: The type of instance to start.
managedInstanceScaling Property Map: Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
modelDataDownloadTimeoutInSeconds Number: The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
routingConfigs List<Property Map>: Sets how the endpoint routes incoming traffic. See routing_config below.
serverlessConfig Property Map: Specifies configuration for how an endpoint performs asynchronous inference.
variantName String: The name of the variant. If omitted, this provider will assign a random, unique name.
volumeSizeInGb Number: The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.

EndpointConfigurationShadowProductionVariantCoreDumpConfig, EndpointConfigurationShadowProductionVariantCoreDumpConfigArgs

DestinationS3Uri string: The Amazon S3 bucket to send the core dump to.
KmsKeyId string: The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.

DestinationS3Uri string: The Amazon S3 bucket to send the core dump to.
KmsKeyId string: The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.

destinationS3Uri String: The Amazon S3 bucket to send the core dump to.
kmsKeyId String: The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.

destinationS3Uri string: The Amazon S3 bucket to send the core dump to.
kmsKeyId string: The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.

destination_s3_uri str: The Amazon S3 bucket to send the core dump to.
kms_key_id str: The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.

destinationS3Uri String: The Amazon S3 bucket to send the core dump to.
kmsKeyId String: The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.

EndpointConfigurationShadowProductionVariantManagedInstanceScaling, EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs

MaxInstanceCount int: The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
MinInstanceCount int: The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
Status string: Indicates whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.

MaxInstanceCount int: The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
MinInstanceCount int: The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
Status string: Indicates whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.

maxInstanceCount Integer: The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
minInstanceCount Integer: The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
status String: Indicates whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.

maxInstanceCount number: The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
minInstanceCount number: The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
status string: Indicates whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.

max_instance_count int: The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
min_instance_count int: The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
status str: Indicates whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.

maxInstanceCount Number: The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
minInstanceCount Number: The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
status String: Indicates whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.

EndpointConfigurationShadowProductionVariantRoutingConfig, EndpointConfigurationShadowProductionVariantRoutingConfigArgs

RoutingStrategy string: Sets how the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.

RoutingStrategy string: Sets how the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.

routingStrategy String: Sets how the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.

routingStrategy string: Sets how the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.

routing_strategy str: Sets how the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.

routingStrategy String: Sets how the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.

EndpointConfigurationShadowProductionVariantServerlessConfig, EndpointConfigurationShadowProductionVariantServerlessConfigArgs

MaxConcurrency int: The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
MemorySizeInMb int: The memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
ProvisionedConcurrency int: The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.

MaxConcurrency int: The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
MemorySizeInMb int: The memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
ProvisionedConcurrency int: The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.

maxConcurrency Integer: The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
memorySizeInMb Integer: The memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
provisionedConcurrency Integer: The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.

maxConcurrency number: The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
memorySizeInMb number: The memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
provisionedConcurrency number: The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.

max_concurrency int: The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
memory_size_in_mb int: The memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
provisioned_concurrency int: The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.

maxConcurrency Number: The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
memorySizeInMb Number: The memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
provisionedConcurrency Number: The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.

Import

Using pulumi import, import endpoint configurations using the name. For example:

$ pulumi import aws:sagemaker/endpointConfiguration:EndpointConfiguration test_endpoint_config endpoint-config-foo

To learn more about importing existing cloud resources, see Importing resources.

Package Details

Repository: AWS Classic pulumi/pulumi-aws
License: Apache-2.0
Notes: This Pulumi package is based on the aws Terraform Provider.

AWS v6.71.0 published on Friday, Mar 7, 2025 by Pulumi

pulumi/pulumi-aws

aws.sagemaker.EndpointConfiguration

On this page

On this page

Example Usage

Create EndpointConfiguration Resource

Constructor syntax

Parameters

Constructor example

EndpointConfiguration Resource Properties

Inputs

Outputs

Look up Existing EndpointConfiguration Resource

Supporting Types

EndpointConfigurationAsyncInferenceConfig, EndpointConfigurationAsyncInferenceConfigArgs

EndpointConfigurationAsyncInferenceConfigClientConfig, EndpointConfigurationAsyncInferenceConfigClientConfigArgs

EndpointConfigurationAsyncInferenceConfigOutputConfig, EndpointConfigurationAsyncInferenceConfigOutputConfigArgs

EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfig, EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArgs

EndpointConfigurationDataCaptureConfig, EndpointConfigurationDataCaptureConfigArgs

EndpointConfigurationDataCaptureConfigCaptureContentTypeHeader, EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgs

EndpointConfigurationDataCaptureConfigCaptureOption, EndpointConfigurationDataCaptureConfigCaptureOptionArgs

EndpointConfigurationProductionVariant, EndpointConfigurationProductionVariantArgs

EndpointConfigurationProductionVariantCoreDumpConfig, EndpointConfigurationProductionVariantCoreDumpConfigArgs

EndpointConfigurationProductionVariantManagedInstanceScaling, EndpointConfigurationProductionVariantManagedInstanceScalingArgs

EndpointConfigurationProductionVariantRoutingConfig, EndpointConfigurationProductionVariantRoutingConfigArgs

EndpointConfigurationProductionVariantServerlessConfig, EndpointConfigurationProductionVariantServerlessConfigArgs

EndpointConfigurationShadowProductionVariant, EndpointConfigurationShadowProductionVariantArgs

EndpointConfigurationShadowProductionVariantCoreDumpConfig, EndpointConfigurationShadowProductionVariantCoreDumpConfigArgs

EndpointConfigurationShadowProductionVariantManagedInstanceScaling, EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs

EndpointConfigurationShadowProductionVariantRoutingConfig, EndpointConfigurationShadowProductionVariantRoutingConfigArgs

EndpointConfigurationShadowProductionVariantServerlessConfig, EndpointConfigurationShadowProductionVariantServerlessConfigArgs

Import

Package Details

On this page

On this page