sglang_v0.5.2/flashinfer_0.3.1/Jenkinsfile

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

// Jenkins pipeline
// See documents at https://jenkins.io/doc/book/pipeline/jenkinsfile/

// Docker env used for testing
// Different image may have different version tag
// because some of them are more stable than anoter.
//
// Docker images are maintained by PMC, cached in dockerhub
// and remains relatively stable over the time.
// Flow for upgrading docker env(need commiter)
//
// - Send PR to upgrade build script in the repo
// - Build the new docker image
// - Tag the docker image with a new version and push to a binary cache.
// - Update the version in the Jenkinsfile, send a PR
// - Fix any issues wrt to the new image version in the PR
// - Merge the PR and now we are in new version
// - Tag the new version as the lates
// - Periodically cleanup the old versions on local workers
//

import org.jenkinsci.plugins.pipeline.modeldefinition.Utils
// These are set at runtime from data in ci/jenkins/docker-images.yml, update
// image tags in that file
// Now supports multiple CUDA versions
docker_run_cu126 = "bash ci/bash.sh flashinfer/flashinfer-ci-cu126:latest"
docker_run_cu128 = "bash ci/bash.sh flashinfer/flashinfer-ci-cu128:latest"
docker_run_cu129 = "bash ci/bash.sh flashinfer/flashinfer-ci-cu129:latest"
docker_run_cu130 = "bash ci/bash.sh flashinfer/flashinfer-ci-cu130:latest"

def per_exec_ws(folder) {
  return "workspace/exec_${env.EXECUTOR_NUMBER}/" + folder
}

def pack_lib(name, libs) {
  sh """
     echo "Packing ${libs} into ${name}"
     echo ${libs} | sed -e 's/,/ /g' | xargs md5sum
     """
  stash includes: libs, name: name
}

def unpack_lib(name, libs) {
  unstash name
  sh """
     echo "Unpacked ${libs} from ${name}"
     echo ${libs} | sed -e 's/,/ /g' | xargs md5sum
     """
}

def cancel_previous_build() {
  // cancel previous build if it is not on main.
  if (env.BRANCH_NAME != 'main') {
    def buildNumber = env.BUILD_NUMBER as int
    // Milestone API allows us to cancel previous build
    // with the same milestone number
    if (buildNumber > 1) milestone(buildNumber - 1)
    milestone(buildNumber)
  }
}

def is_last_build() {
  // check whether it is last build
  try {
    return currentBuild.number == currentBuild.rawBuild.project.getLastBuild().number
  } catch (Throwable ex) {
    echo 'Error during check is_last_build ' + ex.toString()
    return false
  }
}

def init_git(submodule = false) {
  cleanWs()
  // add retry in case checkout timeouts
  retry(5) {
    checkout scm
  }
  if (submodule) {
    retry(5) {
      timeout(time: 10, unit: 'MINUTES') {
        sh(script: 'git submodule update --init --recursive -f', label: 'Update git submodules')
      }
    }
  }
}

def run_with_spot_retry(spot_node_type, on_demand_node_type, test_name, test_closure) {
  try {
    test_closure(spot_node_type)
  } catch (hudson.AbortException abortEx) {
    echo "Received normal AbortException, exit now: " + abortEx.toString()
    throw abortEx
  } catch (Throwable ex) {
    echo "Exception during SPOT run for ${test_name}: " + ex.toString()
    if (is_last_build()) {
      echo "Exception during SPOT run for ${test_name}: " + ex.toString() + " retry on-demand"
      currentBuild.result = 'SUCCESS'
      test_closure(on_demand_node_type)
    } else {
      echo 'Exit since it is not last build'
      throw ex
    }
  }
}

// stage('Lint') {
//   node('CPU-SPOT') {
//     ws(per_exec_ws('flashinfer-lint')) {
//       init_git(false)
//     }
//   }
// }

def run_unittest_CPU_AOT_COMPILE(node_type, cuda_version) {
  echo "Running CPU AOT Compile Unittest with CUDA ${cuda_version}"

  def docker_run = ""
  if (cuda_version == "cu126") {
    docker_run = docker_run_cu126
  } else if (cuda_version == "cu128") {
    docker_run = docker_run_cu128
  } else if (cuda_version == "cu129") {
    docker_run = docker_run_cu129
  } else if (cuda_version == "cu130") {
    docker_run = docker_run_cu130
  } else {
    error("Unknown CUDA version: ${cuda_version}")
  }

  if (node_type.contains('SPOT')) {
    // Add timeout only for spot instances - node allocation only
    def node_allocated = false

    try {
      timeout(time: 15, unit: 'MINUTES') {
        // Only timeout the node allocation, not the test execution
        node(node_type) {
          node_allocated = true
          // Just mark that we got the node, don't run tests here
        }
      }

      // If we reach here, node allocation was successful
      // Now run the tests without any timeout
      node(node_type) {
        ws(per_exec_ws('flashinfer-aot')) {
          init_git(true)
          sh(script: "ls -alh", label: 'Show work directory')
          sh(script: "./scripts/task_show_node_info.sh", label: 'Show node info')
          sh(script: "${docker_run} --no-gpu ./scripts/task_test_aot_build_import.sh", label: 'Test AOT Build and Import')
        }
      }
    } catch (Exception e) {
      if (!node_allocated) {
        echo "Node allocation timeout or failure after 15 minutes for ${node_type}: ${e.toString()}"
      }
      throw e
    }
  } else {
    // No timeout for non-spot instances
    node(node_type) {
      ws(per_exec_ws('flashinfer-aot')) {
        init_git(true)
        sh(script: "ls -alh", label: 'Show work directory')
        sh(script: "./scripts/task_show_node_info.sh", label: 'Show node info')
        sh(script: "${docker_run} --no-gpu ./scripts/task_test_aot_build_import.sh", label: 'Test AOT Build and Import')
      }
    }
  }
}

def shard_run_unittest_GPU(node_type, shard_id, cuda_version) {
  echo "Running unittest on ${node_type}, shard ${shard_id}, CUDA ${cuda_version}"

  def docker_run = ""
  if (cuda_version == "cu126") {
    docker_run = docker_run_cu126
  } else if (cuda_version == "cu128") {
    docker_run = docker_run_cu128
  } else if (cuda_version == "cu129") {
    docker_run = docker_run_cu129
  } else {
    error("Unknown CUDA version: ${cuda_version}")
  }

  if (node_type.contains('SPOT')) {
    // Add timeout only for spot instances - node allocation only
    def node_allocated = false

    try {
      timeout(time: 15, unit: 'MINUTES') {
        // Only timeout the node allocation, not the test execution
        node(node_type) {
          node_allocated = true
          // Just mark that we got the node, don't run tests here
        }
      }

      // If we reach here, node allocation was successful
      // Now run the tests without any timeout
      node(node_type) {
        ws(per_exec_ws('flashinfer-unittest')) {
          init_git(true) // we need cutlass submodule
          sh(script: "ls -alh", label: 'Show work directory')
          sh(script: "./scripts/task_show_node_info.sh", label: 'Show node info')
          sh(script: "${docker_run} ./scripts/task_jit_run_tests_part${shard_id}.sh", label: 'JIT Unittest Part ${shard_id}')
        }
      }
    } catch (Exception e) {
      if (!node_allocated) {
        echo "Node allocation timeout or failure after 15 minutes for ${node_type}: ${e.toString()}"
      }
      throw e
    }
  } else {
    // No timeout for non-spot instances
    node(node_type) {
      ws(per_exec_ws('flashinfer-unittest')) {
        init_git(true) // we need cutlass submodule
        sh(script: "ls -alh", label: 'Show work directory')
        sh(script: "./scripts/task_show_node_info.sh", label: 'Show node info')
        sh(script: "${docker_run} ./scripts/task_jit_run_tests_part${shard_id}.sh", label: 'JIT Unittest Part ${shard_id}')
      }
    }
  }
}

stage('Unittest') {
  cancel_previous_build()
  parallel(
    failFast: true,
    // CUDA 12.6 AOT Tests
    'AOT-Build-Import-x86-64-cu126': {
      run_with_spot_retry('CPU-LARGE-SPOT', 'CPU-LARGE', 'AOT-Build-Import-x86-64-cu126',
        { node_type -> run_unittest_CPU_AOT_COMPILE(node_type, 'cu126') })
    },
    'AOT-Build-Import-aarch64-cu126': {
      run_with_spot_retry('ARM-LARGE-SPOT', 'ARM-LARGE', 'AOT-Build-Import-aarch64-cu126',
        { node_type -> run_unittest_CPU_AOT_COMPILE(node_type, 'cu126') })
    },
    // CUDA 12.8 AOT Tests
    'AOT-Build-Import-x86-64-cu128': {
      run_with_spot_retry('CPU-LARGE-SPOT', 'CPU-LARGE', 'AOT-Build-Import-x86-64-cu128',
        { node_type -> run_unittest_CPU_AOT_COMPILE(node_type, 'cu128') })
    },
    'AOT-Build-Import-aarch64-cu128': {
      run_with_spot_retry('ARM-LARGE-SPOT', 'ARM-LARGE', 'AOT-Build-Import-aarch64-cu128',
        { node_type -> run_unittest_CPU_AOT_COMPILE(node_type, 'cu128') })
    },
    // CUDA 12.9 AOT Tests
    'AOT-Build-Import-x86-64-cu129': {
      run_with_spot_retry('CPU-LARGE-SPOT', 'CPU-LARGE', 'AOT-Build-Import-x86-64-cu129',
        { node_type -> run_unittest_CPU_AOT_COMPILE(node_type, 'cu129') })
    },
    'AOT-Build-Import-aarch64-cu129': {
      run_with_spot_retry('ARM-LARGE-SPOT', 'ARM-LARGE', 'AOT-Build-Import-aarch64-cu129',
        { node_type -> run_unittest_CPU_AOT_COMPILE(node_type, 'cu129') })
    },
    // CUDA 13.0 AOT Tests
    'AOT-Build-Import-x86-64-cu130': {
      run_with_spot_retry('CPU-LARGE-SPOT', 'CPU-LARGE', 'AOT-Build-Import-x86-64-cu130',
        { node_type -> run_unittest_CPU_AOT_COMPILE(node_type, 'cu130') })
    },
    'AOT-Build-Import-aarch64-cu130': {
      run_with_spot_retry('ARM-LARGE-SPOT', 'ARM-LARGE', 'AOT-Build-Import-aarch64-cu130',
        { node_type -> run_unittest_CPU_AOT_COMPILE(node_type, 'cu130') })
    },
    // JIT unittest only for cu129
    'JIT-Unittest-1-cu129': {
      run_with_spot_retry('GPU-G5-SPOT', 'GPU-G5', 'JIT-Unittest-1-cu129',
        { node_type -> shard_run_unittest_GPU(node_type, 1, 'cu129') })
    },
    'JIT-Unittest-2-cu129': {
      run_with_spot_retry('GPU-G5-SPOT', 'GPU-G5', 'JIT-Unittest-2-cu129',
        { node_type -> shard_run_unittest_GPU(node_type, 2, 'cu129') })
    },
    'JIT-Unittest-3-cu129': {
      run_with_spot_retry('GPU-G5-SPOT', 'GPU-G5', 'JIT-Unittest-3-cu129',
        { node_type -> shard_run_unittest_GPU(node_type, 3, 'cu129') })
    },
    'JIT-Unittest-4-cu129': {
      run_with_spot_retry('GPU-G5-SPOT', 'GPU-G5', 'JIT-Unittest-4-cu129',
        { node_type -> shard_run_unittest_GPU(node_type, 4, 'cu129') })
    },
    'JIT-Unittest-5-cu129': {
      run_with_spot_retry('GPU-G5-SPOT', 'GPU-G5', 'JIT-Unittest-5-cu129',
        { node_type -> shard_run_unittest_GPU(node_type, 5, 'cu129') })
    },
  )
}