Some months ago a customer asked me if there was a way to deploy a Windows node pool with spot virtual machines and ephemeral disks in Azure Kubernetes Service (AKS).

The idea was to create a cluster that could be used to run Windows batch workloads and minimize costs by deploying the following:

  • An AKS cluster with 2 linux nodes and ephemeral disks as the default node pool configuration.
  • A Windows node pool with Spot Virtual Machines, ephemeral disks and auto-scaling enabled.
  • Set the windows node pool minimum count and initial number of nodes set to 0.

To create a cluster with the desired configuration with terraform, follow the steps below:

Define the terraform providers to use

Create a providers.tf file with the following contents:

 1terraform {
 2  required_version = "> 0.12"
 3  required_providers {
 4    azurerm = {
 5      source  = "azurerm"
 6      version = "~> 2.26"
 7    }
 8  }
 9}
10
11provider "azurerm" {
12  features {}
13}

Define the variables

Create a variables.tf file with the following contents:

 1variable "resource_group_name" {
 2  default = "aks-win"
 3}
 4
 5variable "location" {
 6  default = "West Europe"
 7}
 8
 9variable "cluster_name" {
10  default = "aks-win"
11}
12
13variable "dns_prefix" {
14  default = "aks-win"
15}

Define the resource group

Create a main.tf file with the following contents:

1# Create Resource Group
2resource "azurerm_resource_group" "rg" {
3  name     = var.resource_group_name
4  location = var.location
5}

Define the VNET for the cluster

Create a vnet-server.tf file with the following contents:

 1resource "azurerm_virtual_network" "vnet" {
 2  name                = "aks-vnet"
 3  location            = azurerm_resource_group.rg.location
 4  resource_group_name = azurerm_resource_group.rg.name
 5  address_space       = ["10.0.0.0/16"]
 6}
 7
 8resource "azurerm_subnet" "aks-subnet" {
 9  name                 = "aks-subnet"
10  resource_group_name  = azurerm_resource_group.rg.name
11  virtual_network_name = azurerm_virtual_network.vnet.name
12  address_prefixes     = ["10.0.1.0/24"]
13}

Define the AKS cluster

Create a aks-server.tf file with the following contents:

 1# Deploy Kubernetes
 2resource "azurerm_kubernetes_cluster" "k8s" {
 3  name                = var.cluster_name
 4  location            = azurerm_resource_group.rg.location
 5  resource_group_name = azurerm_resource_group.rg.name
 6  dns_prefix          = var.dns_prefix
 7
 8  default_node_pool {
 9    name                = "default"
10    node_count          = 2
11    vm_size             = "Standard_D2s_v3"
12    os_disk_size_gb     = 30
13    os_disk_type        = "Ephemeral"
14    vnet_subnet_id      = azurerm_subnet.aks-subnet.id
15    max_pods            = 15
16    enable_auto_scaling = false
17  }
18
19  # Using Managed Identity
20  identity {
21    type = "SystemAssigned"
22  }
23
24  network_profile {
25    # The --service-cidr is used to assign internal services in the AKS cluster an IP address. This IP address range should be an address space that isn't in use elsewhere in your network environment, including any on-premises network ranges if you connect, or plan to connect, your Azure virtual networks using Express Route or a Site-to-Site VPN connection.
26    service_cidr = "172.0.0.0/16"
27    # The --dns-service-ip address should be the .10 address of your service IP address range.
28    dns_service_ip = "172.0.0.10"
29    # The --docker-bridge-address lets the AKS nodes communicate with the underlying management platform. This IP address must not be within the virtual network IP address range of your cluster, and shouldn't overlap with other address ranges in use on your network.
30    docker_bridge_cidr = "172.17.0.1/16"
31    network_plugin     = "azure"
32    network_policy     = "calico"
33  }
34
35  role_based_access_control {
36    enabled = true
37  }
38
39  addon_profile {
40    kube_dashboard {
41      enabled = false
42    }
43  }
44}
45
46resource "azurerm_kubernetes_cluster_node_pool" "windows" {
47  kubernetes_cluster_id = azurerm_kubernetes_cluster.k8s.id
48  name                  = "win"
49  priority        = "Spot"
50  eviction_policy = "Delete"
51  spot_max_price  = -1 # The VMs will not be evicted for pricing reasons.
52  os_type = "Windows"
53  # "The virtual machine size Standard_D2s_v3 has a cache size of 53687091200 bytes, but the OS disk requires 137438953472 bytes. Use a VM size with larger cache or disable ephemeral OS."
54  # https://docs.microsoft.com/en-us/azure/virtual-machines/ephemeral-os-disks#size-requirements
55  vm_size             = "Standard_DS3_v2"
56  os_disk_type        = "Ephemeral"
57  node_count          = 0
58  enable_auto_scaling = true
59  max_count           = 3
60  min_count           = 0
61}
62
63data "azurerm_resource_group" "node_resource_group" {
64  name = azurerm_kubernetes_cluster.k8s.node_resource_group
65}
66
67# Assign the Contributor role to the AKS kubelet identity
68resource "azurerm_role_assignment" "kubelet_contributor" {
69  scope                = data.azurerm_resource_group.node_resource_group.id
70  role_definition_name = "Contributor" #"Virtual Machine Contributor"?
71  principal_id         = azurerm_kubernetes_cluster.k8s.kubelet_identity[0].object_id
72}
73
74resource "azurerm_role_assignment" "kubelet_network_contributor" {
75  scope                = azurerm_virtual_network.vnet.id
76  role_definition_name = "Network Contributor"
77  principal_id         = azurerm_kubernetes_cluster.k8s.identity[0].principal_id
78}

Deploy the AKS cluster

Run:

1terraform init
2terraform apply

Get the credentials for the cluster:

1RESOURCE_GROUP="aks-win"
2CLUSTER_NAME="aks-win"
3az aks get-credentials --resource-group $RESOURCE_GROUP --name $CLUSTER_NAME

To verify that there are no windows VMs running, execute:

1kubectl get nodes

you should see something like:

1NAME                              STATUS   ROLES   AGE   VERSION   INTERNAL-IP   EXTERNAL-IP   OS-IMAGE             KERNEL-VERSION     CONTAINER-RUNTIME
2aks-default-36675761-vmss000000   Ready    agent   80m   v1.20.9   10.0.1.4      <none>        Ubuntu 18.04.5 LTS   5.4.0-1056-azure   containerd://1.4.8+azure
3aks-default-36675761-vmss000001   Ready    agent   80m   v1.20.9   10.0.1.20     <none>        Ubuntu 18.04.5 LTS   5.4.0-1056-azure   containerd://1.4.8+azure    

Deploy a Windows workload:

To deploy a Windows workload, create a windows_deployment.yaml file with the following contents:

 1apiVersion: apps/v1
 2kind: Deployment
 3metadata:
 4  name: servercore
 5  labels:
 6    app: servercore
 7spec:
 8  replicas: 1
 9  template:
10    metadata:
11      name: servercore
12      labels:
13        app: servercore
14    spec:
15      nodeSelector:
16        "kubernetes.azure.com/scalesetpriority": "spot"
17      containers:
18      - name: servercore
19        image: mcr.microsoft.com/dotnet/framework/samples:aspnetapp
20        resources:
21          limits:
22            cpu: 1
23            memory: 800M
24          requests:
25            cpu: .1
26            memory: 150M
27        ports:
28          - containerPort: 80
29      tolerations:
30        - key: "kubernetes.azure.com/scalesetpriority"
31          operator: "Equal"
32          value: "spot"
33          effect: "NoSchedule"
34  selector:
35    matchLabels:
36      app: servercore

and deploy it to your cluster:

1kubectl apply -f windows_deployment.yaml

Note the following:

  • The kubernetes.azure.com/scalesetpriority label is used to ensure that the workload is scheduled on a spot node.
  • tolerations are used to ensure that the workload is scheduled on a spot node.
  • Deployment will take a while (> 5 minutes) since the windows pool must scale up to fullfill the request.

Now check the nodes again:

1kubectl get nodes

this time you should see something like:

1NAME                              STATUS   ROLES   AGE    VERSION   INTERNAL-IP   EXTERNAL-IP   OS-IMAGE                         KERNEL-VERSION     CONTAINER-RUNTIME
2aks-default-36675761-vmss000000   Ready    agent   91m    v1.20.9   10.0.1.4      <none>        Ubuntu 18.04.5 LTS               5.4.0-1056-azure   containerd://1.4.8+azure
3aks-default-36675761-vmss000001   Ready    agent   91m    v1.20.9   10.0.1.20     <none>        Ubuntu 18.04.5 LTS               5.4.0-1056-azure   containerd://1.4.8+azure
4akswin000000                      Ready    agent   102s   v1.20.9   10.0.1.36     <none>        Windows Server 2019 Datacenter   10.0.17763.2114    docker://20.10.6   

If you check the pod events you’ll find that the workload triggered a scale up:

1kubectl describe $(kubectl get po -l "app=servercore" -o name)   

I’ll let you test what happens if you delete the deployment.

Hope it helps!!!

Please find the complete terraform configuration here