local source code for flashinfer_0.3.1 && torch vision_0.22.1
This commit is contained in:
parent
1c59c1ab7d
commit
06e45b5ff9
|
|
@ -0,0 +1,7 @@
|
|||
---
|
||||
BasedOnStyle: Google
|
||||
DerivePointerAlignment: false
|
||||
ColumnLimit: 100
|
||||
PointerAlignment: Left
|
||||
# InsertNewlineAtEOF: true
|
||||
...
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
FROM nvidia/cuda:12.8.0-devel-ubuntu24.04
|
||||
|
||||
# Update package lists and install system dependencies
|
||||
RUN apt-get update && apt-get install -y \
|
||||
curl \
|
||||
wget \
|
||||
git \
|
||||
clang-format \
|
||||
clangd-19 \
|
||||
vim \
|
||||
zsh \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install oh-my-zsh
|
||||
RUN sh -c "$(curl -fsSL https://raw.githubusercontent.com/ohmyzsh/ohmyzsh/master/tools/install.sh)" "" --unattended
|
||||
|
||||
# Install zsh-autosuggestions
|
||||
RUN git clone https://github.com/zsh-users/zsh-autosuggestions ${ZSH_CUSTOM:-~/.oh-my-zsh/custom}/plugins/zsh-autosuggestions
|
||||
|
||||
# Configure zsh
|
||||
RUN sed -i 's/ZSH_THEME="robbyrussell"/ZSH_THEME="fino-time"/' ~/.zshrc && \
|
||||
sed -i 's/plugins=(git)/plugins=(git zsh-autosuggestions)/' ~/.zshrc
|
||||
|
||||
# Create a non-root user
|
||||
ARG USERNAME=devuser
|
||||
ARG USER_UID=1003
|
||||
ARG USER_GID=$USER_UID
|
||||
|
||||
# Create the user
|
||||
RUN groupadd --gid $USER_GID $USERNAME \
|
||||
&& useradd --uid $USER_UID --gid $USER_GID -m $USERNAME \
|
||||
# [Optional] Add sudo support
|
||||
&& apt-get update \
|
||||
&& apt-get install -y sudo \
|
||||
&& echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME \
|
||||
&& chmod 0440 /etc/sudoers.d/$USERNAME \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Remove default 'ubuntu' user (UID 1000) to prevent devcontainer permission conflicts
|
||||
# Ref: https://github.com/rapidsai/devcontainers/pull/373
|
||||
RUN if grep ubuntu:x:1000:1000 /etc/passwd >/dev/null; then userdel -f -r ubuntu; fi
|
||||
|
||||
# Copy zsh configuration to the new user's home
|
||||
RUN cp -r /root/.oh-my-zsh /home/$USERNAME/.oh-my-zsh && \
|
||||
cp /root/.zshrc /home/$USERNAME/.zshrc && \
|
||||
chown -R $USERNAME:$USERNAME /home/$USERNAME/.oh-my-zsh && \
|
||||
chown $USERNAME:$USERNAME /home/$USERNAME/.zshrc
|
||||
|
||||
# Switch to non-root user
|
||||
USER $USERNAME
|
||||
WORKDIR /home/$USERNAME
|
||||
|
||||
# Install python
|
||||
COPY install/install_python.sh /install/install_python.sh
|
||||
RUN bash /install/install_python.sh py312
|
||||
|
||||
# clangd
|
||||
ENV PATH="/usr/lib/llvm-19/bin:$PATH"
|
||||
# conda
|
||||
ENV PATH="/home/devuser/conda/bin:$PATH"
|
||||
ENV PATH="/home/devuser/conda/envs/py312/bin:$PATH"
|
||||
|
||||
# Install python packages
|
||||
COPY install/install_python_packages.sh /install/install_python_packages.sh
|
||||
RUN bash /install/install_python_packages.sh
|
||||
RUN echo "source activate py312" >> /home/devuser/.zshrc
|
||||
|
||||
# Set zsh as default shell
|
||||
ENV SHELL=/bin/zsh
|
||||
CMD [ "zsh" ]
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
{
|
||||
"name": "CUDA Development Container",
|
||||
"build": {
|
||||
"dockerfile": "Dockerfile",
|
||||
"context": "."
|
||||
},
|
||||
"runArgs": [
|
||||
"--gpus=all"
|
||||
],
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"llvm-vs-code-extensions.vscode-clangd",
|
||||
"ms-python.python",
|
||||
"ms-python.black-formatter",
|
||||
"nvidia.nsight-vscode-edition"
|
||||
]
|
||||
}
|
||||
},
|
||||
"mounts": [
|
||||
"type=bind,source=${localEnv:HOME}/.ssh,target=/home/devuser/.ssh,readonly"
|
||||
],
|
||||
"remoteUser": "devuser"
|
||||
}
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
#!/bin/bash
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
set -e
|
||||
set -u
|
||||
set -o pipefail
|
||||
|
||||
|
||||
# Install python and pip. Don't modify this to add Python package dependencies,
|
||||
wget -O Miniforge3.sh "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh"
|
||||
bash Miniforge3.sh -b -p /home/devuser/conda
|
||||
|
||||
/home/devuser/conda/bin/conda create -n $1 python=3.12
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
#!/bin/bash
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
set -e
|
||||
set -u
|
||||
|
||||
pip3 install nvidia-cutlass-dsl
|
||||
pip3 install ninja pytest numpy scipy build cuda-python pytest
|
||||
pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128
|
||||
pip3 install pre-commit
|
||||
pip3 install nvidia-cudnn-cu12==9.1.0.70
|
||||
|
|
@ -0,0 +1,186 @@
|
|||
# ci
|
||||
flashinfer-whl/
|
||||
dist/
|
||||
|
||||
# Compile commands json file
|
||||
compile_commands.json
|
||||
|
||||
# Generated files
|
||||
csrc/generated/
|
||||
csrc/nv_internal/tensorrt_llm/cutlass_instantiations/
|
||||
docs/generated/
|
||||
flashinfer/_build_meta.py
|
||||
flashinfer/data/
|
||||
flashinfer/jit/aot_config.py
|
||||
aot-ops/
|
||||
csrc/aot_default_additional_params.h
|
||||
|
||||
# DS_Store files
|
||||
.DS_store
|
||||
|
||||
# Microbenchmark files
|
||||
microbenchmark/
|
||||
|
||||
# vscode
|
||||
.vscode/
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
.pybuilder/
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
# For a library or package, you might want to ignore these files since the code is
|
||||
# intended to run in multiple environments; otherwise, check them in:
|
||||
# .python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# poetry
|
||||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||
#poetry.lock
|
||||
|
||||
# pdm
|
||||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||
#pdm.lock
|
||||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||
# in version control.
|
||||
# https://pdm.fming.dev/#use-with-ide
|
||||
.pdm.toml
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# pytype static type analyzer
|
||||
.pytype/
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
|
||||
# PyCharm
|
||||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
#.idea/
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
# To use:
|
||||
#
|
||||
# pre-commit run -a
|
||||
#
|
||||
# Or:
|
||||
#
|
||||
# pre-commit install # (runs every time you commit in git)
|
||||
#
|
||||
# To update this file:
|
||||
#
|
||||
# pre-commit autoupdate
|
||||
#
|
||||
# See https://github.com/pre-commit/pre-commit
|
||||
# Note the pre-commit hooks shoule only be used for formatting, but not for linting.
|
||||
# For linting consider using CI.
|
||||
repos:
|
||||
# Standard hooks
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v5.0.0
|
||||
hooks:
|
||||
- id: check-added-large-files
|
||||
- id: check-case-conflict
|
||||
- id: check-merge-conflict
|
||||
- id: check-symlinks
|
||||
- id: end-of-file-fixer
|
||||
- id: mixed-line-ending
|
||||
- id: requirements-txt-fixer
|
||||
- id: trailing-whitespace
|
||||
|
||||
# Changes tabs to spaces
|
||||
- repo: https://github.com/Lucas-C/pre-commit-hooks
|
||||
rev: v1.5.5
|
||||
hooks:
|
||||
- id: remove-tabs
|
||||
- id: remove-crlf
|
||||
|
||||
# Formatters
|
||||
- repo: https://github.com/pre-commit/mirrors-clang-format
|
||||
rev: v19.1.1
|
||||
hooks:
|
||||
- id: clang-format
|
||||
types_or: [c++, c, cuda]
|
||||
exclude: |
|
||||
(?x)^(3rdparty/.* flashinfer/jit/aot_config.py)$
|
||||
|
||||
- repo: https://github.com/pre-commit/mirrors-mypy
|
||||
rev: 'v1.17.1' # Use the sha / tag you want to point at
|
||||
hooks:
|
||||
- id: mypy
|
||||
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
# Ruff version.
|
||||
rev: v0.12.8
|
||||
hooks:
|
||||
# Run the linter.
|
||||
- id: ruff-check
|
||||
# Run the formatter.
|
||||
- id: ruff-format
|
||||
types_or: [ python, pyi ]
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
name: Bug Report
|
||||
description: Create a bug report to help us improve CUTLASS
|
||||
title: "[BUG] "
|
||||
labels: ["? - Needs Triage", "bug"]
|
||||
assignees: []
|
||||
|
||||
body:
|
||||
- type: dropdown
|
||||
id: component
|
||||
attributes:
|
||||
label: Which component has the problem?
|
||||
options:
|
||||
- CuTe DSL
|
||||
- CUTLASS C++
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: bug-report
|
||||
attributes:
|
||||
label: Bug Report
|
||||
description: Please fill out all sections below
|
||||
value: |
|
||||
**Describe the bug**
|
||||
A clear and concise description of what the bug is.
|
||||
|
||||
**Steps/Code to reproduce bug**
|
||||
Follow this guide http://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports to craft a minimal bug report. This helps us reproduce the issue you're having and resolve the issue more quickly.
|
||||
|
||||
**Expected behavior**
|
||||
A clear and concise description of what you expected to happen.
|
||||
|
||||
**Environment details (please complete the following information):**
|
||||
- Environment location: [Bare-metal, Docker, Cloud(specify cloud provider)]
|
||||
|
||||
**Additional context**
|
||||
Add any other context about the problem here.
|
||||
validations:
|
||||
required: true
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
blank_issues_enabled: true
|
||||
contact_links:
|
||||
- name: CUTLASS Discord
|
||||
url: https://discord.gg/nvidiadeveloper
|
||||
about: Come chat about using and contributing to CUTLASS!
|
||||
35
flashinfer_0.3.1/3rdparty/cutlass/.github/ISSUE_TEMPLATE/documentation_request.md
vendored
Normal file
35
flashinfer_0.3.1/3rdparty/cutlass/.github/ISSUE_TEMPLATE/documentation_request.md
vendored
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
---
|
||||
name: Documentation request
|
||||
about: Report incorrect or needed documentation to improve CUTLASS
|
||||
title: "[DOC]"
|
||||
labels: "? - Needs Triage, documentation"
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
## Report incorrect documentation
|
||||
|
||||
**Location of incorrect documentation**
|
||||
Provide links and line numbers if applicable.
|
||||
|
||||
**Describe the problems or issues found in the documentation**
|
||||
A clear and concise description of what you found to be incorrect.
|
||||
|
||||
**Steps taken to verify documentation is incorrect**
|
||||
List any steps you have taken:
|
||||
|
||||
**Suggested fix for documentation**
|
||||
Detail proposed changes to fix the documentation if you have any.
|
||||
|
||||
---
|
||||
|
||||
## Report needed documentation
|
||||
|
||||
**Report needed documentation**
|
||||
A clear and concise description of what documentation you believe it is needed and why.
|
||||
|
||||
**Describe the documentation you'd like**
|
||||
A clear and concise description of what you want to happen.
|
||||
|
||||
**Steps taken to search for needed documentation**
|
||||
List any steps you have taken:
|
||||
35
flashinfer_0.3.1/3rdparty/cutlass/.github/ISSUE_TEMPLATE/feature_request.yml
vendored
Normal file
35
flashinfer_0.3.1/3rdparty/cutlass/.github/ISSUE_TEMPLATE/feature_request.yml
vendored
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
name: Feature Request
|
||||
description: Suggest an idea for CUTLASS
|
||||
title: "[FEA] "
|
||||
labels: ["? - Needs Triage", "feature request"]
|
||||
assignees: []
|
||||
|
||||
body:
|
||||
- type: dropdown
|
||||
id: component
|
||||
attributes:
|
||||
label: Which component requires the feature?
|
||||
options:
|
||||
- CuTe DSL
|
||||
- CUTLASS C++
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: feature-request
|
||||
attributes:
|
||||
label: Feature Request
|
||||
description: Please fill out all sections below
|
||||
value: |
|
||||
**Is your feature request related to a problem? Please describe.**
|
||||
A clear and concise description of what the problem is. Ex. I wish I could use CUTLASS to do [...]
|
||||
|
||||
**Describe the solution you'd like**
|
||||
A clear and concise description of what you want to happen.
|
||||
|
||||
**Describe alternatives you've considered**
|
||||
A clear and concise description of any alternative solutions or features you've considered.
|
||||
|
||||
**Additional context**
|
||||
Add any other context, code examples, or references to existing implementations about the feature request here.
|
||||
validations:
|
||||
required: true
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
---
|
||||
name: Submit question
|
||||
about: Ask a general question about CUTLASS
|
||||
title: "[QST]"
|
||||
labels: "? - Needs Triage, question"
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
**What is your question?**
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
name: Auto Label Issues
|
||||
|
||||
on:
|
||||
issues:
|
||||
types: [opened]
|
||||
|
||||
jobs:
|
||||
add-labels:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
issues: write
|
||||
steps:
|
||||
- name: Add component label
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const issue = context.payload.issue;
|
||||
const body = issue.body || '';
|
||||
|
||||
// Parse the issue body to find the component selection
|
||||
// GitHub renders dropdown selections as "### {label}\n\n{selection}"
|
||||
// Check for both bug report and feature request dropdown labels
|
||||
const bugComponentMatch = body.match(/### Which component has the problem\?\s*\n\s*\n\s*(.+?)(?:\n|$)/);
|
||||
const featureComponentMatch = body.match(/### Which component requires the feature\?\s*\n\s*\n\s*(.+?)(?:\n|$)/);
|
||||
|
||||
const componentMatch = bugComponentMatch || featureComponentMatch;
|
||||
|
||||
if (componentMatch) {
|
||||
const component = componentMatch[1].trim();
|
||||
let label = '';
|
||||
|
||||
// Map component selections to labels
|
||||
switch(component) {
|
||||
case 'CuTe DSL':
|
||||
label = 'CuTe DSL';
|
||||
break;
|
||||
case 'CUTLASS C++':
|
||||
label = 'CUTLASS C++';
|
||||
break;
|
||||
}
|
||||
|
||||
if (label) {
|
||||
await github.rest.issues.addLabels({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: issue.number,
|
||||
labels: [label]
|
||||
});
|
||||
console.log(`Added label: ${label}`);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,112 @@
|
|||
#################################################################################################
|
||||
#
|
||||
# Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
#################################################################################################
|
||||
|
||||
# A workflow to trigger ci on hybrid infra (github + self hosted runner)
|
||||
name: Blossom-CI
|
||||
on:
|
||||
issue_comment:
|
||||
types: [created]
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
platform:
|
||||
description: 'runs-on argument'
|
||||
required: false
|
||||
args:
|
||||
description: 'argument'
|
||||
required: false
|
||||
|
||||
jobs:
|
||||
Authorization:
|
||||
name: Authorization
|
||||
runs-on: blossom
|
||||
outputs:
|
||||
args: ${{ env.args }}
|
||||
|
||||
# This job only runs for pull request comments
|
||||
if: |
|
||||
(startsWith(github.event.comment.body, '/bot run') ||
|
||||
startsWith(github.event.comment.body, '/bot kill')) && contains(
|
||||
fromJson('["nv-fastkernels-cicd", "zekunf-nv", "hwu36", "IonThruster", "thakkarV", "d-k-b", "mihir-awatramani", "fengxie", "vickiw973", "Junkai-Wu", "brandon-yujie-sun", "lijingticy22", "hongw-nv", "vikgupta-nv", "IwakuraRein", "depaulmillz", "jackkosaian", "itramble", "ccecka", "sxtyzhangzk", "hbarclay", "yzhaiustc", "x86vk", "sklevtsov-nvidia", "ANIKET-SHIVAM", "Shreya-gaur", "azhurkevich", "serifyesil", "richardmcai", "lsyyy666", "Ethan-Yan27", "XiaoSong9905", "shdetect", "keithzzzzz"]'),
|
||||
github.actor)
|
||||
steps:
|
||||
- name: Check if comment is issued by authorized person
|
||||
run: blossom-ci
|
||||
env:
|
||||
OPERATION: 'AUTH'
|
||||
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
REPO_KEY_DATA: ${{ secrets.BLOSSOM_KEY }}
|
||||
|
||||
Vulnerability-scan:
|
||||
name: Vulnerability scan
|
||||
needs: [Authorization]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
repository: ${{ fromJson(needs.Authorization.outputs.args).repo }}
|
||||
ref: ${{ fromJson(needs.Authorization.outputs.args).ref }}
|
||||
lfs: 'true'
|
||||
|
||||
- name: Run blossom action
|
||||
uses: NVIDIA/blossom-action@main
|
||||
env:
|
||||
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
REPO_KEY_DATA: ${{ secrets.BLOSSOM_KEY }}
|
||||
with:
|
||||
args1: ${{ fromJson(needs.Authorization.outputs.args).args1 }}
|
||||
args2: ${{ fromJson(needs.Authorization.outputs.args).args2 }}
|
||||
args3: ${{ fromJson(needs.Authorization.outputs.args).args3 }}
|
||||
|
||||
Job-trigger:
|
||||
name: Start ci job
|
||||
needs: [Vulnerability-scan]
|
||||
runs-on: blossom
|
||||
steps:
|
||||
- name: Start ci job
|
||||
run: blossom-ci
|
||||
env:
|
||||
OPERATION: 'START-CI-JOB'
|
||||
CI_SERVER: ${{ secrets.CI_SERVER }}
|
||||
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
Upload-Log:
|
||||
name: Upload log
|
||||
runs-on: blossom
|
||||
if : github.event_name == 'workflow_dispatch'
|
||||
steps:
|
||||
- name: Jenkins log for pull request ${{ fromJson(github.event.inputs.args).pr }} (click here)
|
||||
run: blossom-ci
|
||||
env:
|
||||
OPERATION: 'POST-PROCESSING'
|
||||
CI_SERVER: ${{ secrets.CI_SERVER }}
|
||||
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
name: "Pull Request Labeler"
|
||||
on:
|
||||
- pull_request_target
|
||||
|
||||
jobs:
|
||||
triage:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/labeler@main
|
||||
with:
|
||||
repo-token: "${{ secrets.GITHUB_TOKEN }}"
|
||||
35
flashinfer_0.3.1/3rdparty/cutlass/.github/workflows/new-issues-to-triage-projects.yml
vendored
Normal file
35
flashinfer_0.3.1/3rdparty/cutlass/.github/workflows/new-issues-to-triage-projects.yml
vendored
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
name: Auto Assign New Issues to Triage Project
|
||||
|
||||
on:
|
||||
issues:
|
||||
types: [opened]
|
||||
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
jobs:
|
||||
assign_one_project:
|
||||
runs-on: ubuntu-latest
|
||||
name: Assign to New Issues to Triage Project
|
||||
steps:
|
||||
- name: Process bug issues
|
||||
uses: docker://takanabe/github-actions-automate-projects:v0.0.1
|
||||
if: contains(github.event.issue.labels.*.name, 'bug') && contains(github.event.issue.labels.*.name, '? - Needs Triage')
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
GITHUB_PROJECT_URL: https://github.com/NVIDIA/cutlass
|
||||
GITHUB_PROJECT_COLUMN_NAME: 'Needs prioritizing'
|
||||
- name: Process feature issues
|
||||
uses: docker://takanabe/github-actions-automate-projects:v0.0.1
|
||||
if: contains(github.event.issue.labels.*.name, 'feature request') && contains(github.event.issue.labels.*.name, '? - Needs Triage')
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
GITHUB_PROJECT_URL: https://github.com/NVIDIA/cutlass
|
||||
GITHUB_PROJECT_COLUMN_NAME: 'Needs prioritizing'
|
||||
- name: Process other issues
|
||||
uses: docker://takanabe/github-actions-automate-projects:v0.0.1
|
||||
if: contains(github.event.issue.labels.*.name, '? - Needs Triage') && (!contains(github.event.issue.labels.*.name, 'bug') && !contains(github.event.issue.labels.*.name, 'feature request'))
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
GITHUB_PROJECT_URL: https://github.com/NVIDIA/cutlass
|
||||
GITHUB_PROJECT_COLUMN_NAME: 'Needs prioritizing'
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
name: Mark inactive issues and pull requests
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: "0 * * * *"
|
||||
|
||||
jobs:
|
||||
mark-inactive-30d:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Mark 30 day inactive issues and pull requests
|
||||
uses: actions/stale@v3
|
||||
with:
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
stale-issue-message: >
|
||||
This issue has been labeled `inactive-30d` due to no recent activity in the past 30 days.
|
||||
Please close this issue if no further response or action is needed.
|
||||
Otherwise, please respond with a comment indicating any updates or changes to the original issue and/or confirm this issue still needs to be addressed.
|
||||
This issue will be labeled `inactive-90d` if there is no activity in the next 60 days.
|
||||
stale-issue-label: "inactive-30d"
|
||||
exempt-issue-labels: "0 - Blocked,0 - Backlog,good first issue"
|
||||
days-before-issue-stale: 30
|
||||
days-before-issue-close: -1
|
||||
stale-pr-message: >
|
||||
This PR has been labeled `inactive-30d` due to no recent activity in the past 30 days.
|
||||
Please close this PR if it is no longer required.
|
||||
Otherwise, please respond with a comment indicating any updates.
|
||||
This PR will be labeled `inactive-90d` if there is no activity in the next 60 days.
|
||||
stale-pr-label: "inactive-30d"
|
||||
exempt-pr-labels: "0 - Blocked,0 - Backlog,good first issue"
|
||||
days-before-pr-stale: 30
|
||||
days-before-pr-close: -1
|
||||
operations-per-run: 50
|
||||
mark-inactive-90d:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Mark 90 day inactive issues and pull requests
|
||||
uses: actions/stale@v3
|
||||
with:
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
stale-issue-message: >
|
||||
This issue has been labeled `inactive-90d` due to no recent activity in the past 90 days.
|
||||
Please close this issue if no further response or action is needed.
|
||||
Otherwise, please respond with a comment indicating any updates or changes to the original issue and/or confirm this issue still needs to be addressed.
|
||||
stale-issue-label: "inactive-90d"
|
||||
exempt-issue-labels: "0 - Blocked,0 - Backlog,good first issue"
|
||||
days-before-issue-stale: 90
|
||||
days-before-issue-close: -1
|
||||
stale-pr-message: >
|
||||
This PR has been labeled `inactive-90d` due to no recent activity in the past 90 days.
|
||||
Please close this PR if it is no longer required.
|
||||
Otherwise, please respond with a comment indicating any updates.
|
||||
stale-pr-label: "inactive-90d"
|
||||
exempt-pr-labels: "0 - Blocked,0 - Backlog,good first issue"
|
||||
days-before-pr-stale: 90
|
||||
days-before-pr-close: -1
|
||||
operations-per-run: 50
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
# PyCache files
|
||||
__pycache__/
|
||||
cutlass_library.egg-info/
|
||||
/build*
|
||||
|
|
@ -0,0 +1,742 @@
|
|||
# Changelog
|
||||
|
||||
# CUTLASS 4.x
|
||||
|
||||
## [4.2.0](https://github.com/NVIDIA/cutlass/tree/main) (2025-08-21)
|
||||
|
||||
### CuTe DSL
|
||||
* We will likely be skipping 4.2.dev release and directly target 4.2.
|
||||
* CuTeDSL version remains at 4.1.0 till then.
|
||||
|
||||
### CUTLASS C++
|
||||
* Add K major scale factor support for Hopper SM90 blockwise kernels.
|
||||
* Further enhance Blackwell SM100 Attention kernels in [example 77](https://github.com/NVIDIA/cutlass/tree/main/examples/77_blackwell_fmha/).
|
||||
- Add fused reduction kernel support for cutlass MLA.
|
||||
- Fix an issue where `get_unmasked_trip_count` may return a negative value.
|
||||
- Fix an issue where mbarriers are initialized with a zero arrival count.
|
||||
* Add Blackwell SM120 blockwise gemm kernel example: [example 87](https://github.com/NVIDIA/cutlass/tree/main/87_blackwell_geforce_gemm_blockwise/).
|
||||
* Support for Blackwell SM100 cpasync kernel.
|
||||
- Collective mainloop codes: [cpasync mainloop](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/gemm/collective/sm100_mma_cpasync_warpspecialized.hpp).
|
||||
- Kernel codes: [cpasync kernel](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/gemm/kernel/sm100_gemm_cpasync_warpspecialized.hpp).
|
||||
* Support for Blackwell SM121 kernels for DGX Spark GPUs.
|
||||
- Share the major codes with Blackwell SM120 kernels.
|
||||
* Support for Blackwell SM100 legacy mixed input GEMM kernels.
|
||||
- Collective mainloop codes: [Mixed input mainloop](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/gemm/collective/sm100_mma_warpspecialized_mixed_input.hpp).
|
||||
- Kernel codes: [Mixed input kernel](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/gemm/kernel/sm100_gemm_tma_warpspecialized_mixed_input_transform.hpp).
|
||||
- Example codes: [example 86](https://github.com/NVIDIA/cutlass/tree/main/examples/86_blackwell_mixed_dtype_gemm/).
|
||||
* Support for Blackwell SM100 fp4 gemv kernels.
|
||||
- Kernel codes: [Gemv kernel](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/gemm/kernel/gemv_blockscaled.h).
|
||||
- Example codes: [example 91](https://github.com/NVIDIA/cutlass/tree/main/examples/91_fp4_gemv/)
|
||||
* From CUDA 13.0, the Blackwell SM101 for Thor GPUs is renamed to SM110.
|
||||
- For CUDA toolkit version < 13.0, SM101 is still used for Thor GPUs.
|
||||
- For CUDA toolkit version >= 13.0, SM110 is used for Thor GPUs and SM101 is no longer valid.
|
||||
* CuTe changes:
|
||||
- Fix inaccurate GridDim calculation under [CuTe tutorial](https://github.com/NVIDIA/cutlass/tree/main/examples/cute/tutorial/blackwell/).
|
||||
- Add [movmatrix](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-instructions-movmatrix) support.
|
||||
- Fix smallest MMA-N allowed for Blackwell fp8 and fp16 gemm kernels.
|
||||
- Support fp16 accmulator for sm89 fp8 mma.
|
||||
- Shorten `nullspace` implementation.
|
||||
- Isolate and comment on `cosize` hacks.
|
||||
- Important documentation correction: `E<0,1> == 1@0@1`.
|
||||
* Add support for heuristics-based kernel filtering and autotuning using `nvidia-matmul-heuristics`.
|
||||
- Details please refer to [heuristics doc](https://github.com/NVIDIA/cutlass/tree/main/media/docs/cpp/heuristics.md).
|
||||
* Rename legacy Python API package from `cutlass` to `cutlass_cppgen`.
|
||||
* Fix some profiler issues:
|
||||
- Modify default cluster callback values to none 0 to avoid profiler failure when these values are not set in command line.
|
||||
- Fix some no output and timeout issues.
|
||||
* Add following unit tests:
|
||||
- [fp16 accmulator for sm89 fp8 mma](https://github.com/NVIDIA/cutlass/tree/main/test/unit/cute/ampere/cooperative_gemm.cu)
|
||||
- [movmatrix test](https://github.com/NVIDIA/cutlass/tree/main/test/unit/cute/turing/movm.cu)
|
||||
- [fp8 narrow mma n](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/sm100_tensorop_gemm/f16_f16_void_f32_narrow_mma_n.cu) and [fp16 narrow mma n](test/unit/gemm/device/sm100_tensorop_gemm/f8_f8_void_bf16_narrow_mma_n.cu)
|
||||
* Various improvements and fixes from the community and CUTLASS team. Thanks to everyone who submitted PRs!
|
||||
* Optimal code generation with CUDA toolkit versions 13.0.
|
||||
|
||||
## [4.1.0](https://github.com/NVIDIA/cutlass/releases/tag/v4.1.0) (2025-07-16)
|
||||
|
||||
### CuTe DSL
|
||||
* Add aarch64 support, you can now pip install `nvidia-cutlass-dsl` on GB200 systems!
|
||||
* More examples demonstrating how to use CuTe DSL to write peak-performance kernels
|
||||
- [Blackwell Mamba2 SSD](https://github.com/NVIDIA/cutlass/tree/main/examples/python/CuTeDSL/blackwell/mamba2_ssd/mamba2_ssd.py)
|
||||
- [Blackwell SM100 persistent dense blockscaled GEMM with static scheduling](https://github.com/NVIDIA/cutlass/tree/main/examples/python/CuTeDSL/blackwell/dense_blockscaled_gemm_persistent.py)
|
||||
* API updates
|
||||
- Please refer to [DSL API changelog](https://docs.nvidia.com/cutlass/media/docs/pythonDSL/cute_dsl_api/changelog.html) for details
|
||||
|
||||
### CUTLASS C++
|
||||
* Further enhance Blackwell SM100 Attention kernels in [example 77](https://github.com/NVIDIA/cutlass/tree/main/examples/77_blackwell_fmha/).
|
||||
- Add variable sequence length support for FMHA Backward kernel.
|
||||
- Add varlen test support to Backward runner.
|
||||
- Codes support empty batch sequences.
|
||||
* Replace `subbyte_iterator` with `cute::recast_ptr` when constructing logical iterators/arrays.
|
||||
* CuTe changes:
|
||||
- Rewrite ArithTuple and ScaledBasis for robustness and clarity.
|
||||
- Remove buggy and kludgy `get_layoutA|B|C_MN` and friends from Atoms/TiledX.
|
||||
- Factor out `print_latex` and friends and rewrite.
|
||||
- Factor out `print_svg` and friends and rewrite.
|
||||
* Support Blackwell SM100 SIMT packed fp32x2 kernels.
|
||||
* Support residual add for implicit gemm kernels.
|
||||
* Various fixes for CUTLASS C++ Python interface's EVT tracer:
|
||||
- Add verifier for sm90 to report the invalid input.
|
||||
- When adding an edge to the graph, if the edge already exists, add an identity compute node to avoid having multiple parallel edges.
|
||||
- Register operations of tanh, sigmoid, exp, gelu to the python ast frontend.
|
||||
- Replace the NotImplemented Error by packing all nodes into a single topological visitor node as a fallback.
|
||||
* Fix profiler bugs in exhaustive perf search.
|
||||
- Fix incorrect cluster shape output issue when doing exhaustive search.
|
||||
- Fix a bug in profiler grouped GEMM for setting tile scheduler swizzles, cluster shapes, and raster orders.
|
||||
* Fix some profiler issues.
|
||||
- Complete the reference for Blackwell blockwise gemm kernels.
|
||||
- Fix incorrect regex logic for L1 test.
|
||||
* Various improvements and fixes from the community and CUTLASS team. Thanks to everyone who submitted PRs!
|
||||
* Optimal code generation with CUDA toolkit versions 12.9.
|
||||
|
||||
## [4.0.0](https://github.com/NVIDIA/cutlass/releases/tag/v4.0.0) (2025-06-03)
|
||||
|
||||
### CuTe DSL
|
||||
* CuTe DSL, a Python DSL centered around CuTe's abstractions
|
||||
- [Core DSL implementation files](https://github.com/NVIDIA/cutlass/tree/main/python/CuTeDSL)
|
||||
- [DSL quick start](https://docs.nvidia.com/cutlass/media/docs/pythonDSL/quick_start.html)
|
||||
- [DSL Overview](https://docs.nvidia.com/cutlass/media/docs/pythonDSL/overview.html)
|
||||
* [Overhauled documentation with a new dedicated website](https://docs.nvidia.com/cutlass)
|
||||
* Set of examples demonstrating how to use CuTe DSL to write peak-performance kernels
|
||||
- [Blackwell SM100 persistent dense GEMM with static scheduling](https://github.com/NVIDIA/cutlass/tree/main/examples/python/CuTeDSL/blackwell/dense_gemm_persistent.py)
|
||||
- [Blackwell SM100 grouped GEMM](https://github.com/NVIDIA/cutlass/tree/main/examples/python/CuTeDSL/blackwell/grouped_gemm.py)
|
||||
- [Blackwell SM100 fused multi-head attention forward pass](https://github.com/NVIDIA/cutlass/tree/main/examples/python/CuTeDSL/blackwell/fmha.py)
|
||||
- [Hopper GEMM](https://github.com/NVIDIA/cutlass/tree/main/examples/python/CuTeDSL/hopper/dense_gemm.py)
|
||||
- [Ampere GEMM](https://github.com/NVIDIA/cutlass/tree/main/examples/python/CuTeDSL/ampere/tensorop_gemm.py)
|
||||
- [FlashAttention-2 implementation targeting Ampere and Ada class GPUs (SM80, SM86, SM89)](https://github.com/NVIDIA/cutlass/tree/main/examples/python/CuTeDSL/ampere/flash_attention_v2.py)
|
||||
- [SmemAllocator to facilitate shared memory allocation and management](https://github.com/NVIDIA/cutlass/tree/main/examples/python/CuTeDSL/ampere/smem_allocator.py)
|
||||
- [C-structure based customized interface between JIT function and user codes](https://github.com/NVIDIA/cutlass/tree/main/examples/python/CuTeDSL/cute/ffi/jit_argument.py)
|
||||
* [Educational notebooks for getting started with CuTe DSL](https://github.com/NVIDIA/cutlass/tree/main/examples/python/CuTeDSL/notebooks)
|
||||
* API updates
|
||||
- Please refer to [DSL API changelog](https://docs.nvidia.com/cutlass/media/docs/pythonDSL/cute_dsl_api/changelog.html) for details
|
||||
|
||||
### CUTLASS C++
|
||||
* Support [Family Specific Architecture Features](https://developer.nvidia.com/blog/nvidia-blackwell-and-nvidia-cuda-12-9-introduce-family-specific-architecture-features/) which was introduced in CUDA 12.9
|
||||
- 100f, 101f, 120f were added to support Family Specific Architecture Features which allows running the same binary on different chips belonging to the same Family (e.g. sm100) without recompiling. Note 101a is supported since CUTLASS 3.9
|
||||
* Instruction shapes and redundant accumulation type have been removed from CUTLASS 3.x-style library kernel names to disambiguate kernels and shorten names.
|
||||
- For example:
|
||||
+ `(old) cutlass3x_sm90_tensorop_s64x128x16gemm_bf16_bf16_f32_bf16_bf16_128x256x64_1x1x1_0_tnn_align8_warpspecialized_cooperative_epi_tma`
|
||||
+ `(new) cutlass3x_sm90_tensorop_gemm_bf16_bf16_f32_bf16_bf16_128x256x64_1x1x1_0_tnn_align8_warpspecialized_cooperative_epi_tma`
|
||||
- If you are using the CUTLASS library kernel names directly (e.g. to compile a subset of the CUTLASS library with `-DCUTLASS_LIBRARY_KERNELS`, filter kernels in the CUTLASS profiler with `--kernels`), please update your uses accordingly, this is a breaking change.
|
||||
* Further improved [Blockwise](https://github.com/NVIDIA/cutlass/tree/main/examples/67_hopper_fp8_warp_specialized_gemm_with_blockwise_scaling/67_hopper_fp8_warp_specialized_gemm_with_blockwise_scaling.cu) and [Groupwise](https://github.com/NVIDIA/cutlass/tree/main/examples/67_hopper_fp8_warp_specialized_gemm_with_blockwise_scaling/67_hopper_fp8_warp_specialized_gemm_with_groupwise_scaling.cu) GEMMs on Hopper and Blackwell.
|
||||
- Added non-power-of-two tile sizes.
|
||||
- Improved performance for K-major scale factors.
|
||||
- The argument `mma_promotion_interval` has been removed from non-grouped GEMM to align with the grouped and Blackwell SM100 versions.
|
||||
* Enhance Blackwell SM100 Attention kernels in [example 77](https://github.com/NVIDIA/cutlass/tree/main/examples/77_blackwell_fmha/).
|
||||
- Support LSE output in FMHA Forward kernel.
|
||||
- Enhance performance measurement: support of different warmup iterations; buffer rotation to keep L2 cold; separate testing of persistent and non-persistent.
|
||||
- Enhance testing of variable sequence length.
|
||||
- Disable B2B mode in MLA to simplify the sample.
|
||||
- Clarify that `fmha_gen` sample only supports head dim 128.
|
||||
- Fixes for split-kv output in MLA.
|
||||
* Improve Blackwell and Hopper grouped GEMM performance, functionality, and profiler support.
|
||||
- Enable runtime datatype for Blackwell SM100 grouped GEMM. Profiler support is also added.
|
||||
- Enable kernel parameter exploration for Blackwell SM100 grouped GEMM - raster_order, swizzle.
|
||||
* Add [Blackwell SM100 implicit GEMM conv fprop/dgrad/wgrad unit tests](https://github.com/NVIDIA/cutlass/tree/main/test/unit/conv/device_3x/).
|
||||
* Add dynamic and preferred cluster support for convolution Blackwell SM100 kernels.
|
||||
* Fix profiler issues which cause no output or not supported error for some kernels.
|
||||
* Optimizations for Blackwell SM100 and SM120 block scaled kernels.
|
||||
* Support for Blackwell SM120 blockwise dense gemm in CUTLASS library and profiler.
|
||||
* New [Hopper SM90 FMHA example](https://github.com/NVIDIA/cutlass/tree/main/examples/88_hopper_fmha/), similar in design to the existing [Blackwell FMHA](https://github.com/NVIDIA/cutlass/tree/main/examples/77_blackwell_fmha/).
|
||||
* CuTe changes:
|
||||
- Rework `cute::copy_if` so that the predicate tensor is also a true CuTe Tensor rather than a lambda and introduces transform-tensors to avoid any extra register or load/store overhead in using bool-tensors.
|
||||
- New [CuTe tutorial](https://github.com/NVIDIA/cutlass/tree/main/examples/cute/tutorial/tiled_copy_if.cu) to show the usage of copy_if in tile copy.
|
||||
- Add [CuTe C++ reduce op](https://github.com/NVIDIA/cutlass/tree/main/include/cute/algorithm/tensor_reduce.hpp).
|
||||
- Add several [unit tests](https://github.com/NVIDIA/cutlass/tree/main/test/unit/cute/core/tensor_algs.cpp) for CuTe tensor algorithms.
|
||||
* Various improvements and fixes from the community and CUTLASS team. Thanks to everyone who submitted PRs!
|
||||
* Optimal code generation with CUDA toolkit versions 12.9.
|
||||
|
||||
|
||||
# CUTLASS 3.x
|
||||
|
||||
## [3.9.2](https://github.com/NVIDIA/cutlass/releases/tag/v3.9.2) (2025-05-03)
|
||||
* Fixed [Blockwise](https://github.com/NVIDIA/cutlass/tree/main/examples/67_hopper_fp8_warp_specialized_gemm_with_blockwise_scaling/67_hopper_fp8_warp_specialized_gemm_with_blockwise_scaling.cu) and [Groupwise](https://github.com/NVIDIA/cutlass/tree/main/examples/67_hopper_fp8_warp_specialized_gemm_with_blockwise_scaling/67_hopper_fp8_warp_specialized_gemm_with_groupwise_scaling.cu) GEMM hang issue when problem size K is 128.
|
||||
* Optimal code generation with CUDA toolkit versions 12.9.
|
||||
|
||||
## [3.9.1](https://github.com/NVIDIA/cutlass/releases/tag/v3.9.1) (2025-04-30)
|
||||
* Fixed Group Gemm hang issue in CUTLASS 3.x
|
||||
* Improved Hopper [Blockwise](https://github.com/NVIDIA/cutlass/tree/main/examples/67_hopper_fp8_warp_specialized_gemm_with_blockwise_scaling/67_hopper_fp8_warp_specialized_gemm_with_blockwise_scaling.cu) and [Groupwise](https://github.com/NVIDIA/cutlass/tree/main/examples/67_hopper_fp8_warp_specialized_gemm_with_blockwise_scaling/67_hopper_fp8_warp_specialized_gemm_with_groupwise_scaling.cu) GEMM performance.
|
||||
|
||||
## [3.9.0](https://github.com/NVIDIA/cutlass/releases/tag/v3.9.0) (2025-04-24)
|
||||
|
||||
* Support for Blackwell SM120 kernels for GeForce GPUs in CUTLASS 3.x API:
|
||||
- Collective mainloops that target for:
|
||||
* [Blockscaled datatypes with support for dense GEMM](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/gemm/collective/sm120_blockscaled_mma_tma.hpp)
|
||||
* [Blockscaled datatypes with support for sparse GEMM](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/gemm/collective/sm120_blockscaled_sparse_mma_tma.hpp)
|
||||
- New [GEMM](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/gemm/dispatch_policy.hpp) and [epilogue](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/epilogue/dispatch_policy.hpp) dispatch policies for collectives, kernel layers, and builders.
|
||||
- [Blackwell SM120 epilogue](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/epilogue/fusion/sm120_visitor_store_tma_warpspecialized.hpp) and [full set of EVT fusions](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/epilogue/fusion/sm120_callbacks_tma_warpspecialized.hpp).
|
||||
* Set of examples that demonstrate the usage of the 3.x API for targeting Blackwell SM120 architecture:
|
||||
- [Blockscaled GEMM with NVFP4 input datatype and BF16 output tensor](https://github.com/NVIDIA/cutlass/tree/main/examples/79_blackwell_geforce_gemm/79a_blackwell_geforce_nvfp4_bf16_gemm.cu).
|
||||
- [Blockscaled GEMM with NVFP4 input datatype and NVFP4 output tensor with scale factor generation](https://github.com/NVIDIA/cutlass/tree/main/examples/79_blackwell_geforce_gemm/79b_blackwell_geforce_nvfp4_nvfp4_gemm.cu).
|
||||
- [Blockscaled GEMM with mixed input datatype (MXFP8 and MXFP6) and BF16 output tensor](https://github.com/NVIDIA/cutlass/tree/main/examples/79_blackwell_geforce_gemm/79c_blackwell_geforce_mixed_mxfp8_mxfp6_bf16_gemm.cu).
|
||||
- [Grouped GEMM with nvfp4 datatype](https://github.com/NVIDIA/cutlass/tree/main/examples/79_blackwell_geforce_gemm/79d_blackwell_geforce_nvfp4_grouped_gemm.cu).
|
||||
- [Sparse Blockscaled GEMM with mxfp8 input datatype and BF16 output tensor](https://github.com/NVIDIA/cutlass/tree/main/examples/80_blackwell_geforce_sparse_gemm/80a_blackwell_geforce_mxfp8_bf16_sparse_gemm.cu).
|
||||
- [Sparse Blockscaled GEMM with NVFP4 input datatype and NVFP4 output tensor](https://github.com/NVIDIA/cutlass/tree/main/examples/80_blackwell_geforce_sparse_gemm/80b_blackwell_geforce_nvfp4_nvfp4_sparse_gemm.cu).
|
||||
* Set of unit tests that demonstrate the usage of both [sparse](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/sm120_blockscaled_sparse_tensorop_gemm/) and [dense](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/sm120_blockscaled_tensorop_gemm/) Blackwell SM120 blockscaled GEMM.
|
||||
* Support for Blackwell SM100 Sparse kernels:
|
||||
- Collective mainloop that target for
|
||||
* [SM100 Sparse GEMM](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/gemm/collective/sm100_sparse_mma_warpspecialized.hpp)
|
||||
* Set of example that demonstrate the usage of the 3.x API for targeting Blackwell SM100 Sparse GEMM:
|
||||
- [Sparse GEMM](https://github.com/NVIDIA/cutlass/tree/main/examples/83_blackwell_sparse_gemm/83_blackwell_sparse_gemm.cu)
|
||||
- [Blockscaled Sparse GEMM with NVFP4 input data type](https://github.com/NVIDIA/cutlass/tree/main/examples/84_blackwell_narrow_precision_sparse_gemm/84a_blackwell_nvfp4_bf16_sparse_gemm.cu)
|
||||
- [Blockscaled Sparse GEMM with mixed input data type (MXFP8 and MXFP4)](https://github.com/NVIDIA/cutlass/tree/main/examples/84_blackwell_narrow_precision_sparse_gemm/84b_blackwell_mixed_mxfp8_bf16_sparse_gemm.cu)
|
||||
* Set of unit tests that demonstrate the usage of [sparse](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/sm100_sparse_tensorop_gemm) and [blockscaled sparse](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/sm100_blockscaled_sparse_tensorop_gemm) Blackwell SM100 GEMM.
|
||||
* A new Multi-head Latent Attention (MLA) for SM100 Blackwell architecture in CUTLASS [example](https://github.com/NVIDIA/cutlass/tree/main/examples/77_blackwell_fmha/) covers the flashMLA-like weight-absorbed decoding use-case.
|
||||
* A new FMHA Backward kernel for SM100 Blackwell architecture extends CUTLASS [example](https://github.com/NVIDIA/cutlass/tree/main/examples/77_blackwell_fmha/) to show how the five backward pass MMAs can be fused into a single kernel to achieve high performance.
|
||||
* A new [distributed GEMM example](https://github.com/NVIDIA/cutlass/tree/main/examples/82_blackwell_distributed_gemm/82_blackwell_distributed_gemm.cu) for SM100 Blackwell architecture.
|
||||
* Enhancement and new support of block-wise and group-wise GEMM for Hopper and Blackwell architectures:
|
||||
- Enhancement of [blockwise GEMM](https://github.com/NVIDIA/cutlass/tree/main/examples/67_hopper_fp8_warp_specialized_gemm_with_blockwise_scaling/67_hopper_fp8_warp_specialized_gemm_with_blockwise_scaling.cu) for Hopper architecture.
|
||||
- Enhancement of [groupwise GEMM](https://github.com/NVIDIA/cutlass/tree/main/examples/67_hopper_fp8_warp_specialized_gemm_with_blockwise_scaling/67_hopper_fp8_warp_specialized_gemm_with_groupwise_scaling.cu) for Hopper architecture.
|
||||
- Support for [grouped GEMM with blockwise and groupwise scaling](https://github.com/NVIDIA/cutlass/tree/main/examples/68_hopper_fp8_warp_specialized_grouped_gemm_with_blockwise_scaling/) for Hopper architecture.
|
||||
- Support for [grouped-wise GEMM](https://github.com/NVIDIA/cutlass/tree/main/tools/profiler/src/blockwise_gemm_operation_profiler.cu) in CUTLASS profiler.
|
||||
- Support for [blockwise GEMM](https://github.com/NVIDIA/cutlass/tree/main/examples/81_blackwell_gemm_blockwise/81_blackwell_gemm_blockwise.cu) for Blackwell architecture.
|
||||
- Support for [groupwise GEMM](https://github.com/NVIDIA/cutlass/tree/main/examples/81_blackwell_gemm_blockwise/81_blackwell_gemm_groupwise.cu) for Blackwell architecture.
|
||||
- Support for [grouped GEMM with blockwise](https://github.com/NVIDIA/cutlass/tree/main/examples/81_blackwell_gemm_blockwise/81_blackwell_grouped_gemm_blockwise.cu) and [groupwise scaling](https://github.com/NVIDIA/cutlass/tree/main/examples/81_blackwell_gemm_blockwise/81_blackwell_grouped_gemm_groupwise.cu) for Blackwell architecture.
|
||||
* Added support for enhanced kernel performance search (auto-tuning) in CUTLASS profiler:
|
||||
- Sorting performance results by GFLOPs/second: Users can now sort the final performance report based on GFLOPs/second, making it easier to identify the most efficient kernels.
|
||||
- Exhaustive search for best kernel performance in GFLOPs/second: The profiler now searches for the best-performing kernel across a range of problem sizes, swizzle sizes, rasterization orders, and dynamic cluster configurations to maximize performance.
|
||||
- Performance search under a fixed GEMM shape: Enables exhaustive tuning within a fixed GEMM shape, exploring various kernel parameters to find the best configuration.
|
||||
- More detailed introductions and examples to leverage this feature can be found in [profiler.md](https://docs.nvidia.com/cutlass/media/docs/cpp/profiler.html#exhaustive-search-mode-and-top-k-output-ranking-according-to-performance-in-gflopss).
|
||||
* Support `void` as the D element in sm100 kernel epilogues.
|
||||
* Various improvements and fixes from the community and CUTLASS team. Thanks to everyone who submitted PRs!
|
||||
* Optimal code generation with CUDA toolkit versions 12.8U1.
|
||||
|
||||
## [3.8.0](https://github.com/NVIDIA/cutlass/releases/tag/v3.8.0) (2025-01-25)
|
||||
|
||||
* Support for new CuTe building blocks specifically for Blackwell SM100 architecture:
|
||||
- [5th generation Blackwell Tensor Core instructions (TCGen05)](https://github.com/NVIDIA/cutlass/tree/main/include/cute/atom/mma_traits_sm100.hpp) via CuTe MMA atoms.
|
||||
- Extensions to [Tensor Memory Accelerator](https://github.com/NVIDIA/cutlass/tree/main/include/cute/atom/copy_traits_sm100_tma.hpp) via CuTe Copy atoms.
|
||||
- Exposure of Blackwell's new tensor memory (note: distinct from TMA) as [`tmem`](https://github.com/NVIDIA/cutlass/tree/main/include/cute/pointer.hpp) across CuTe as a first class data locale.
|
||||
- Exposure of [`tmem->rmem`, `rmem->tmem` and `smem->tmem data movement instructions`](https://github.com/NVIDIA/cutlass/tree/main/include/cute/atom/copy_traits_sm100.hpp) as copy atoms in CuTe.
|
||||
- [`make_tmem_copy()`](https://github.com/NVIDIA/cutlass/tree/main/include/cute/atom/copy_traits_sm100.hpp) utility method to ease creation of tiled copies for tmem copy atoms.
|
||||
- Support for [new variants of LDSM on Blackwell](https://github.com/NVIDIA/cutlass/tree/main/include/cute/atom/copy_traits_sm100.hpp) via CuTe Copy atoms.
|
||||
* Support for new CUTLASS building blocks specifically for Blackwell SM100 architecture:
|
||||
- Various narrow precision [FP4, FP6, and FP8](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/exmy_base.h) formats as well as their [block-scaled variants NVFP4, MXFP4, MXFP6, and MXFP8](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/float_subbyte.h)
|
||||
- [Pipelines that implement Blackwell specific synchronization](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/pipeline/sm100_pipeline.hpp).
|
||||
- [Cluster launch control API supporting preferred and fallback cluster shapes](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/cluster_launch.hpp).
|
||||
- Data types including NVFP4, MXFP4, MXFP6, and MXFP8 and all their supported element and scale factor types.
|
||||
- Tile schedulers using [Blackwell's Cluster Launch Control (CLC) feature](https://docs.nvidia.com/cutlass/media/docs/cpp/blackwell_cluster_launch_control.html) to implement dynamic persistence scheduling for [GEMMs](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/gemm/kernel/sm100_tile_scheduler.hpp), and [stream-K](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/gemm/kernel/sm100_tile_scheduler_stream_k.hpp).
|
||||
- Extensions to testbeds and reference check code for unit tests and CUTLASS profiler.
|
||||
* Full support for Blackwell SM100 kernels in CUTLASS 3.x API:
|
||||
- [Blackwell specific kernel layers](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/gemm/kernel/sm100_gemm_tma_warpspecialized.hpp) that
|
||||
+ Implement a new warp-specialization recipe tuned specifically for Blackwell SM100 architecture.
|
||||
+ Leverage all the new features such as CLC based tile scheduling, preferred cluster, and TMEM based double buffering of accumulators.
|
||||
+ Support stream-K load balancing for all kernel types everywhere via composable scheduler support.
|
||||
- Blackwell collective mainloops that target the TCGen05 MMA instructions (both SS and TS) for
|
||||
* [Non-block scaled data types without support for pointer array and grouped GEMM with TMA](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/gemm/collective/sm100_mma_warpspecialized.hpp)
|
||||
* [Non-block scaled data types with support for pointer array and grouped GEMM with TMA](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/gemm/collective/sm100_mma_array_warpspecialized.hpp)
|
||||
* [Block scaled data types without support for pointer array and grouped GEMM with TMA](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/gemm/collective/sm100_blockscaled_mma_warpspecialized.hpp)
|
||||
* [Block scaled data types with support for pointer array and grouped GEMM with TMA](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/gemm/collective/sm100_blockscaled_mma_array_warpspecialized.hpp)
|
||||
- Blackwell [collective mainloop for convolution kernels](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/conv/collective/sm100_implicit_gemm_umma_warpspecialized.hpp) supporting non-block scaled data types for fprop, dgrad, and wgrad.
|
||||
- New [GEMM](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/gemm/dispatch_policy.hpp), [convolution](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/conv/dispatch_policy.hpp), and [epilogue](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/epilogue/dispatch_policy.hpp) dispatch policies for collectives, kernel layers, and builders.
|
||||
- [Blackwell epilogue that supports loading accumulators from `tmem`](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/epilogue/collective/sm100_epilogue_tma_warpspecialized.hpp) and full set of EVT fusions.
|
||||
* CUTLASS library and profiler integration for block scaled data types for kernel emission, profiling, and verification.
|
||||
- Support for preferred and fallback cluster shapes via profiler command line arguments parsing to set dynamic cluster shapes.
|
||||
- Support for dynamic datatypes by parsing profiler via profiler command line arguments parsing to set dynamic datatype setting in TCGen05 MMA instruction descriptors.
|
||||
- Support for mixed input GEMM kernels on Hopper in the profiler.
|
||||
* New CUTLASS profiler flag `use-cuda-graphs` to reduce overheads when benchmarking launch-bound kernels.
|
||||
* A new 3.x version of grouped GEMM to the CUTLASS library and generates kernels for Hopper and Blackwell. Now grouped GEMM support is enabled in the CUTLASS profiler (`./cutlass_profiler --operation=GroupedGemm --help` for details).
|
||||
* Set of examples that demonstrate the usage of the 3.x API for targeting Blackwell SM100 architecture:
|
||||
- [Basic FP16 and FP8 GEMMs with minimal changes from Hopper examples](https://github.com/NVIDIA/cutlass/tree/main/examples/70_blackwell_gemm/), demonstrating ease of migration for off the shelf kernels using the 3.x collective builder API.
|
||||
- GEMM with [opt-in collective builder schedules showcasing available recipes](https://github.com/NVIDIA/cutlass/tree/main/examples/71_blackwell_gemm_with_collective_builder/71_blackwell_gemm_with_collective_builder.cu) for Blackwell.
|
||||
- Block scaled data type GEMMs targeting Blackwell's native block scaled Tensor Cores:
|
||||
+ [NVFP4 inputs with BF16 output](https://github.com/NVIDIA/cutlass/tree/main/examples/72_blackwell_narrow_precision_gemm/72a_blackwell_nvfp4_bf16_gemm.cu)
|
||||
+ [NVFP4 inputs with NVFP4 output](https://github.com/NVIDIA/cutlass/tree/main/examples/72_blackwell_narrow_precision_gemm/72b_blackwell_nvfp4_nvfp4_gemm.cu)
|
||||
+ [Mixed MXFP8 and MXFP6 inputs with BF16 output](https://github.com/NVIDIA/cutlass/tree/main/examples/72_blackwell_narrow_precision_gemm/72c_blackwell_mixed_mxfp8_bf16_gemm.cu)
|
||||
- GEMM example demonstrating [Blackwell's new preferred cluster support via dynamic cluster shapes](https://github.com/NVIDIA/cutlass/tree/main/examples/73_blackwell_gemm_preferred_cluster/blackwell_gemm_preferred_cluster.cu) for increased occupancy.
|
||||
- [GEMM with CLC based StreamK scheduler for load balancing](https://github.com/NVIDIA/cutlass/tree/main/examples/74_blackwell_gemm_streamk/blackwell_gemm_streamk.cu).
|
||||
- Grouped GEMM for [vanilla FP8 data inputs](https://github.com/NVIDIA/cutlass/tree/main/examples/75_blackwell_grouped_gemm/75_blackwell_grouped_gemm.cu) and [NVFP4 block scaled inputs](https://github.com/NVIDIA/cutlass/tree/main/examples/75_blackwell_grouped_gemm/75_blackwell_grouped_gemm_block_scaled.cu).
|
||||
- Convolution kernels for [fprop](https://github.com/NVIDIA/cutlass/tree/main/examples/76_blackwell_conv/76_blackwell_conv_fprop.cu), [dgrad](https://github.com/NVIDIA/cutlass/tree/main/examples/76_blackwell_conv/76_blackwell_conv_dgrad.cu), and [wgrad](https://github.com/NVIDIA/cutlass/tree/main/examples/76_blackwell_conv/76_blackwell_conv_wgrad.cu).
|
||||
- [Fused multi-head attention fprop kernel](https://github.com/NVIDIA/cutlass/tree/main/examples/77_blackwell_fmha/77_blackwell_fmha.cu) supporting fp16/bf16/fp8 data types across head dims of 32,64, and 128.
|
||||
- A new BF16x9 GEMM [kernel](https://github.com/NVIDIA/cutlass/tree/main/examples/78_blackwell_emulated_bf16x9_gemm/78_blackwell_emulated_bf16x9_gemm.cu) that emulates FP32 GEMM (SGEMM) using BF16 operations.
|
||||
* Set of examples that demonstrate the usage of the 3.x API for targeting Hopper architecture:
|
||||
- A set of new [Hopper grouped GEMM kernels](https://github.com/NVIDIA/cutlass/tree/main/examples/69_hopper_mixed_dtype_grouped_gemm/) that support mixed A and B datatypes.
|
||||
- A new [Hopper FP8 GEMM with groupwise scaling](https://github.com/NVIDIA/cutlass/tree/main/examples/67_hopper_fp8_warp_specialized_gemm_with_blockwise_scaling/67_hopper_fp8_warp_specialized_gemm_with_groupwise_scaling.cu).
|
||||
* Documentation updates:
|
||||
- [Quickstart - instantiating a Blackwell block-scaled GEMM](https://docs.nvidia.com/cutlass/media/docs/cpp/quickstart.html#instantiating-a-blackwell-sm100-gemm-kernel).
|
||||
- Detailed [Blackwell block-scaled GEMM functionality documentation](https://docs.nvidia.com/cutlass/media/docs/cpp/blackwell_functionality.html)
|
||||
- A new [functionality documentation](https://docs.nvidia.com/cutlass/media/docs/cpp/functionality.html) specifically for 3.x API comprehensively documenting all supported kernel types, data types, kernel features, minimum CUDA tookit support etc for 3.x supported architectures.
|
||||
- Updates to [compatibility](https://docs.nvidia.com/cutlass/overview.html#compatibility) section regarding supported compilers, operating systems, CUDA Toolkits, Hardware Architectures, and [Target Architecture](https://docs.nvidia.com/cutlass/overview.html#target-architecture).
|
||||
- Updates to [profiler documentation](https://docs.nvidia.com/cutlass/media/docs/cpp/profiler.html) for testing mixed input GEMM kernels on Hopper.
|
||||
|
||||
## [3.7.0](https://github.com/NVIDIA/cutlass/releases/tag/v3.7.0) (2025-01-11)
|
||||
- [Hopper blockwise scaling FP8 GEMM](https://github.com/NVIDIA/cutlass/tree/main/examples/67_hopper_fp8_warp_specialized_gemm_with_blockwise_scaling/67_hopper_fp8_warp_specialized_gemm_with_blockwise_scaling.cu) uses 2D scaling tensor, assigning one value per threadblock. This allows a finer-grained scaling to be applied for each output tile per gemm-k iteration. The operands and scaling tensors are loaded from global memory to shared memory using TMA and cp_async, respectively. The scaling is applied inside the mainloop. Details with figures are [here](https://github.com/NVIDIA/cutlass/pull/1932#issue-2645398439).
|
||||
- [Distributed GEMM](https://github.com/NVIDIA/cutlass/tree/main/examples/65_distributed_gemm/65_distributed_gemm.cu) is a new (experimental) API which can turn existing CUTLASS GEMM kernels into pipelined Tensor Parallel GEMMs that run efficiently on NVLink-based network of GPUs. Its pipelining schedules can hide most of the communication behind computation, and relies on point-to-point communication, which can simply use CUDA runtime's peer device access feature. It also utilizes remote TMA loads and memcopies with CUDA graphs to handle communication primarily through the Copy Engine, leaving all SMs free for Hopper's persistent kernels. For more details you can refer to the [DistGEMM blog post](https://blog.shi-labs.com/distributed-gemm-88be6a481e2b).
|
||||
- Improved persistent grid launch for Hopper kernels with large cluster sizes (>= size of 4) using the new `make_kernel_hardware_info` API as shown in [example 48](https://github.com/NVIDIA/cutlass/tree/main/examples/48_hopper_warp_specialized_gemm/48_hopper_warp_specialized_gemm.cu).
|
||||
- Enabled high precision accumulation for Hopper FP8 Sparse GEMM.
|
||||
- Potential API breaking changes:
|
||||
+ Fix `cute::UniversalCopy` for type safety.
|
||||
+ No longer implicitly select `cute::SM80_CP_ASYNC_*` based on input tensors. This avoids implicit downstream synchronization requirements. To use `SM80_CP_ASYNC`, users must explicitly select the appropriate CopyAtom.
|
||||
+ Fix `cute::SM80_CP_ASYNC_CACHEALWAYS`, `cute::SM80_CP_ASYNC_CACHEGLOBAL`, `cute::SM80_CP_ASYNC_CACHEALWAYS_ZFILL`, `cute::SM80_CP_ASYNC_CACHEGLOBAL_ZFILL` to avoid implicitly selecting `ZFILL` behavior on predication.
|
||||
+ Remove `cute::copy_vec<T>` in favor of `cute::copy_aligned` and `cute::copy(AutoVectorizingCopyWithAssumedAlignment<NumBits>,...)`.
|
||||
+ A refactor of default epilogue struct `DefaultEpilogue` [API](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/epilogue/collective/default_epilogue.hpp) to avoid reading non-void `ElementC` value for `ElementC = void` kernel.
|
||||
- New CUTLASS profiler flags: `profiling-duration`, `min-iterations`, and `kernels-file` documented in [profiler.md](https://docs.nvidia.com/cutlass/media/docs/cpp/profiler.html#cutlass-profiler).
|
||||
- Various improvements and fixes from the community and CUTLASS team. Thanks to everyone who submitted PRs!
|
||||
- Optimal code generation with CUDA toolkit versions 12.6.
|
||||
|
||||
## [3.6.0](https://github.com/NVIDIA/cutlass/releases/tag/v3.6.0) (2024-10-03)
|
||||
|
||||
- [Hopper structured sparse GEMM](https://github.com/NVIDIA/cutlass/tree/main/examples/62_hopper_sparse_gemm/62_hopper_sparse_gemm.cu).
|
||||
+ [FP16](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/sm90_sparse_gemm_f16_f16_f32_tensor_op_f32.cu)
|
||||
+ [FP8](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/sm90_sparse_gemm_f8_f8_f32_tensor_op_f32.cu)
|
||||
+ [INT8](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/sm90_sparse_gemm_s8_s8_s32_tensor_op_s32.cu)
|
||||
+ [TF32](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/sm90_sparse_gemm_tf32_tf32_f32_tensor_op_f32.cu)
|
||||
- A refactor to the CUTLASS 3.x convolution `kernel::ConvUniversal` [API](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/conv/kernel/sm90_implicit_gemm_tma_warpspecialized.hpp) to bring it in line with `gemm::GemmUniversal`. Now the 3.x convolution API is no longer considered as a beta API.
|
||||
- [An improved mixed input GEMM](https://github.com/NVIDIA/cutlass/tree/main/examples/55_hopper_mixed_dtype_gemm/README.md) and a [lookup table implementation](https://github.com/NVIDIA/cutlass/tree/main/examples/55_hopper_mixed_dtype_gemm/55_hopper_int4_fp8_gemm.cu) for `INT4`x`FP8` scale-only mode.
|
||||
- [EVT nodes for Top-K selection and softmax](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/epilogue/fusion/sm90_visitor_topk_softmax.hpp) and [GEMM example using those](https://github.com/NVIDIA/cutlass/tree/main/examples/61_hopper_gemm_with_topk_and_softmax/61_hopper_gemm_with_topk_and_softmax.cu).
|
||||
- [Programmatic Dependent Launch](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/arch/grid_dependency_control.h) (PDL) that leverages a new Hopper feature to speedup two back-to-back kernels, and its corresponding [documentations](https://docs.nvidia.com/cutlass/media/docs/cpp/dependent_kernel_launch.html).
|
||||
- [A new debugging tool, synclog](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/arch/synclog.hpp), for dumping out all synchronization events from within a kernel to a file. Please see [synclog documentation](https://docs.nvidia.com/cutlass/media/docs/cpp/utilities.html#debugging-asynchronous-kernels-with-cutlasss-built-in-synclog-tool) for details.
|
||||
- A new TMA-enabled [epilogue](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/epilogue/collective/sm90_epilogue_array_tma_warpspecialized.hpp) for grouped GEMM that brings significant performance improvement, as well as its EVT support.
|
||||
- A SIMT-enabled pointer-array [epilogue](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/epilogue/collective/sm70_epilogue_vectorized_array.hpp).
|
||||
- A new [Ping-Pong kernel schedule for Grouped GEMM](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/gemm/kernel/sm90_gemm_array_tma_warpspecialized_pingpong.hpp) and some other optimizations.
|
||||
- [A new instantiation strategy for CUTLASS profiler kernels](https://github.com/NVIDIA/cutlass/tree/main/python/cutlass_library/sm90_shapes.py) along with [improved documentation for instantiation level in CUTLASS profiler](https://docs.nvidia.com/cutlass/media/docs/cpp/profiler.html#instantiating-more-kernels-with-hopper).
|
||||
- A new hardware support for comparisons and computations of [`cutlass::bfloat16_t`](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/bfloat16.h)
|
||||
- Fixed use of isnan on Windows for [`half_t`](https://github.com/NVIDIA/cutlass/tree/main/test/unit/core/functional.cu).
|
||||
- Various improvements and fixes from the community and CUTLASS team. Thanks to everyone who submitted PRs!
|
||||
- Optimal code generation with CUDA toolkit versions 12.6.
|
||||
|
||||
## [3.5.1](https://github.com/NVIDIA/cutlass/releases/tag/v3.5.1) (2024-07-25)
|
||||
|
||||
- [Minimal SM90 WGMMA + TMA GEMM example in 100 lines of code](https://github.com/NVIDIA/cutlass/tree/main/examples/cute/tutorial/wgmma_sm90.cu)
|
||||
- [Exposure of L2 `cache_hint`s in TMA copy atoms](https://github.com/NVIDIA/cutlass/tree/main/include/cute/arch/copy_sm90_tma.hpp#L48)
|
||||
- Exposure of raster order and tile swizzle extent in [CUTLASS library profiler](./media/docs/cpp/profiler.md#gemm), and
|
||||
[example 48](https://github.com/NVIDIA/cutlass/tree/main/examples/48_hopper_warp_specialized_gemm/48_hopper_warp_specialized_gemm.cu).
|
||||
- [TMA store based and EVT supported epilogues](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/epilogue/collective/sm90_epilogue_array_tma_warpspecialized.hpp) for [Hopper pointer array batched kernels](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/sm90_gemm_f16_f16_f16_tensor_op_f32_ptr_array.cu).
|
||||
- A new [`GemmSparseUniversal` API for CUTLASS 2.x Ampere kernels](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/gemm/device/gemm_sparse_universal.h) to enable serial and parallel split-k for sparse tensor cores and new tiny tile sizes to better support LLM inferrence:
|
||||
+ [FP16 TN](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/gemm_f16t_f16n_f32t_tensor_op_f32_sparse_sm80.cu#L269-L393) and [NT](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/gemm_f16n_f16t_f32t_tensor_op_f32_sparse_sm80.cu#L269-L411).
|
||||
+ [int8 TN](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/gemm_s8t_s8n_s32t_tensor_op_s32_sparse_sm80.cu#L264-L452).
|
||||
+ [int4 TN](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/gemm_s4t_s4n_s32t_tensor_op_s32_sparse_sm80.cu#L264-L452).
|
||||
+ [FP32 TN](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/gemm_f32t_f32n_f32t_tensor_op_f32_sparse_sm80.cu#L427-L642) and [NT](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/gemm_f32n_f32t_f32t_tensor_op_f32_sparse_sm80.cu#L427-L456).
|
||||
- [CUDA host adapter](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/cuda_host_adapter.hpp) extensions to support TMA descriptor construction driver APIs.
|
||||
- Inclusion of more [Hopper fprop, dgrad, and wgrad convolution kernels in CUTLASS library and profiler](https://github.com/NVIDIA/cutlass/tree/main/python/cutlass_library/generator.py).
|
||||
- Support for residual add (beta != 0) in convolution kernels.
|
||||
- A new convolution [epilogue](https://github.com/NVIDIA/cutlass/tree/main/examples/16_ampere_tensorop_conv2dfprop/ampere_tensorop_conv2dfprop.cu#L269) for CUTLASS 2.x to support non-packed NHWC output.
|
||||
- A refactor of [include files throughout CUTLASS core directories](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/gemm/collective/collective_mma_decl.hpp) to reduce circular dependencies and [tests to guard against them](https://github.com/NVIDIA/cutlass/tree/main/test/self_contained_includes/CMakeLists.txt).
|
||||
- [A guide for setting up VSCode to work well with CUTLASS](https://docs.nvidia.com/cutlass/media/docs/cpp/ide_setup.html) and [expanded code style guide](https://docs.nvidia.com/cutlass/media/docs/cpp/programming_guidelines.html).
|
||||
- Better support for MSVC as a host compiler.
|
||||
- Many performance optimizations, improvements, and bug fixes including fixes for FlashAttention-2.
|
||||
- Optimal code generation with CUDA toolkit versions 12.4 and 12.5u1.
|
||||
|
||||
## [3.5.0](https://github.com/NVIDIA/cutlass/releases/tag/v3.5.0) (2024-04-09)
|
||||
|
||||
- Implicit GEMM Convolutions targeting Hopper SM90A via WGMMA + [TMA im2col](https://github.com/NVIDIA/cutlass/tree/main/include/cute/atom/copy_traits_sm90_im2col.hpp)
|
||||
+ Native implementation in CUTLASS 3.x using CuTe, mirroring the [same design hierarchy as that of GEMMs](https://docs.nvidia.com/cutlass/media/docs/cpp/gemm_api_3x.html).
|
||||
+ Support for 1D, 2D, and 3D convolutions in a [rank-agnostic fashion](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/conv/convnd_problem_shape.hpp).
|
||||
+ Support for [Fprop](https://github.com/NVIDIA/cutlass/tree/main/test/unit/conv/device_3x/fprop/sm90_conv3d_fprop_implicit_gemm_s8_s8_s32_tensorop_s32.cu), [Dgrad](https://github.com/NVIDIA/cutlass/tree/main/test/unit/conv/device_3x/dgrad/sm90_conv2d_dgrad_implicit_gemm_f16_f16_f32_tensorop_f16.cu), and [Wgrad](https://github.com/NVIDIA/cutlass/tree/main/test/unit/conv/device_3x/wgrad/sm90_conv1d_wgrad_implicit_gemm_f16_f16_f32_tensorop_f16.cu) algorithms
|
||||
+ [CUTLASS profiler support](https://github.com/NVIDIA/cutlass/tree/main/python/cutlass_library/conv3x_emitter.py) for 2D and 3D convolutions implemented via the 3.x API.
|
||||
+ NOTE: this is a beta release. Further updates to CUTLASS will include major performance improvements, feature enablement, and possible breaking changes to the API until 3.7 release. Your feedback is welcome on the design!
|
||||
- Support for [Ada (SM89) FP8 tensor cores via the 2.x API](https://github.com/NVIDIA/cutlass/tree/main/examples/58_ada_fp8_gemm/ada_fp8_gemm.cu). Requires CUDA 12.4 or newer.
|
||||
- [Ampere gather/scatter convolution example](https://github.com/NVIDIA/cutlass/tree/main/examples/59_ampere_gather_scatter_conv/README.md) in CuTe and CUTLASS 3.x
|
||||
+ Showcasing how custom kernels can be written and optimized using CUTLASS 3.x and CuTe and the general strategy for implementing convolutions as specializations of GETTs.
|
||||
+ Implementation of a coarse grained sparse gather/scatter kernel achieving peak performance on Ampere class tensor cores.
|
||||
- 32x and 16x tile sizes are added to CUTLASS 2.x to improve the performance of narrow-tall and wide-short matrices.
|
||||
+ [Ampere FP16 TN](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/gemm_f16t_f16n_f16t_tensor_op_f32_sm80.cu) and [NT](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/gemm_f16n_f16t_f16t_tensor_op_f32_sm80.cu#L227-L301), [Ampere INT8 TN](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/gemm_s8t_s8n_s8t_tensor_op_s32_sm80.cu#L392-L1342), [Ampere INT4 TN](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/gemm_s4t_s4n_s4t_tensor_op_s32_sm80.cu#L372-L934).
|
||||
+ [Turing FP16 TN](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/gemm_f16t_f16n_f16t_tensor_op_f32_sm75.cu#L55-L394), [Turing INT8 TN](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/gemm_s8t_s8n_s8t_tensor_op_s32_sm75.cu#L166-L537), [Turing INT4 TN](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/gemm_s4t_s4n_s4t_tensor_op_s32_sm75.cu#L310-L564).
|
||||
- Updates to CuTe documentation for [`cute::Tensor<>`](./media/docs/cpp/cute/03_tensor.md), [MMA atoms](./media/docs/cpp/cute/0t_mma_atom.md), and an overhauled [CuTe GEMM tutorial series](https://github.com/NVIDIA/cutlass/tree/main/examples/cute/tutorial).
|
||||
- Extensions to CuTe to support [L2 prefetching](https://github.com/NVIDIA/cutlass/tree/main/include/cute/algorithm/prefetch.hpp) and [TMA store+reductions](https://github.com/NVIDIA/cutlass/tree/main/include/cute/arch/copy_sm90_tma.hpp#L1337).
|
||||
- Remove C++11 requirement on a few CUTLASS 2.x API header files. All CUTLASS files now require C++17.
|
||||
- Fixes to greatly reduce build warnings.
|
||||
- Updates and bugfixes from the community (thanks!)
|
||||
|
||||
## [3.4.1](https://github.com/NVIDIA/cutlass/releases/tag/v3.4.1) (2024-02-14)
|
||||
|
||||
- Statically available [CUTLASS Version macros](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/version.h) that allow for handling API changes between CUTLASS releases on the users' side.
|
||||
- Improvements for Hopper [Group-GEMMs](https://github.com/NVIDIA/cutlass/tree/main/examples/57_hopper_grouped_gemm) and [Pointer-Array Batched GEMMs](https://github.com/NVIDIA/cutlass/tree/main/examples/56_hopper_ptr_array_batched_gemm).
|
||||
- Updates and bugfixes from the community (thanks!).
|
||||
|
||||
## [3.4.0](https://github.com/NVIDIA/cutlass/releases/tag/v3.4.0) (2024-01-12)
|
||||
* Expanded [Mixed-input Hopper GEMMs](https://github.com/NVIDIA/cutlass/tree/main/examples/55_hopper_mixed_dtype_gemm) support covering {16-bit, 8-bit} x {8-bit, 4-bit} input types with fast numerical converters and group scaling factors.
|
||||
* Performance improvements to [Mixed-input Hopper GEMMs](https://github.com/NVIDIA/cutlass/tree/main/examples/55_hopper_mixed_dtype_gemm)
|
||||
* Beta release of [Pointer-Array Batched GEMMs](https://github.com/NVIDIA/cutlass/tree/main/examples/56_hopper_ptr_array_batched_gemm) now available on Hopper GPUs utilizing TMA and WGMMA (requires CUDA 12.3 or above).
|
||||
* Beta release of [Group-GEMM](https://github.com/NVIDIA/cutlass/tree/main/examples/57_hopper_grouped_gemm) utilizing TMA and WGMMA (requires CUDA 12.3 or above).
|
||||
* [Ampere Sparse GEMM](https://github.com/NVIDIA/cutlass/tree/main/examples/15_ampere_sparse_tensorop_gemm/ampere_sparse_tensorop_gemm_with_visitor.cu) supports Epilogue Visitor Tree (EVT) now.
|
||||
* NamedBarriers usability improvement and list of [ReservedNamedBarriers](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/arch/barrier.h) has been officially released.
|
||||
* Improved CuTe documentation including improved clarity and depth of [Quickstart](./media/docs/cpp/cute/00_quickstart.md), [CuTe Layout](./media/docs/cpp/cute/01_layout.md), and [CuTe Layout Algebra](./media/docs/cpp/cute/02_layout_algebra.md). Associated code comments, post-conditions, and details in [CuTe Core Unit Tests](./test/unit/cute/core/) also improved.
|
||||
|
||||
## [3.3](https://github.com/NVIDIA/cutlass/releases/tag/v3.3.0) (2023-10-31)
|
||||
* [Mixed-input Hopper GEMMs](https://github.com/NVIDIA/cutlass/tree/main/examples/55_hopper_mixed_dtype_gemm) support covering 16-bit x 8-bit input operand types.
|
||||
* [Mixed-input Ampere GEMMs](https://github.com/NVIDIA/cutlass/pull/1084) with support for canonical layouts (TN). The implementation supports upcast on operandB {fp16, bf16} x {s8, u8}, and upcast on operandA {s8, u8} x {fp16, bf16}.
|
||||
* [Copy Async based Hopper GEMMs](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/sm90_gemm_bf16_bf16_bf16_alignx_tensor_op_f32_warpspecialized_cooperative.cu) - which support lower than 16B aligned input tensors.
|
||||
* Kernel schedules and Builder support for mixed precision and Copy Async GEMMs with < 16B aligned input tensors.
|
||||
* Profiler support for lower-aligned Hopper GEMMs.
|
||||
* Performance Improvements to [Scatter-Gather Hopper Example](https://github.com/NVIDIA/cutlass/tree/main/examples/52_hopper_gather_scatter_fusion).
|
||||
* Sub-Byte type fixes and improvements.
|
||||
* EVT Support for RELU with Aux bitmap tensor store (used in dRELU). See [SM90 EVT fusions](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/epilogue/fusion/sm90_visitor_compute_tma_warpspecialized.hpp) for details.
|
||||
* Fusion support for backprop fusions including drelu, dgelu, and dbias.
|
||||
* Support for void-C kernels and SM80 mixed-input GEMMs in the CUTLASS Python interface
|
||||
|
||||
## [3.2.2](https://github.com/NVIDIA/cutlass/releases/tag/v3.2.2) (2023-10-25)
|
||||
* Minor patch for issue/1138
|
||||
|
||||
## [3.2.1](https://github.com/NVIDIA/cutlass/releases/tag/v3.2.1) (2023-09-22)
|
||||
* Python support SM90 Epilogue Visitor Tree (EVT) on top of the C++ support released in 3.2.0.
|
||||
* SM80 EVT support in C++ and Python.
|
||||
* Other SM90 epilogue improvements.
|
||||
* Splitting CUTLASS library into smaller units based on operation, arch and datatypes. See [1105](https://github.com/NVIDIA/cutlass/discussions/1105) for details.
|
||||
* Making `tools/library/scripts` packageable - `tools/library/scripts` is now moving to `python/cutlass_library`. See the Python [README](https://github.com/NVIDIA/cutlass/tree/main/python/README.md) for details.
|
||||
* SM90 TF32 kernel improvements for all layouts.
|
||||
* SM90 rasterization direction support in the CUTLASS profiler.
|
||||
* Improvement for CUTLASS profiler build times.
|
||||
* Remove Python-C++ bindings.
|
||||
|
||||
## [3.2.0](https://github.com/NVIDIA/cutlass/releases/tag/v3.2.0) (2023-08-03)
|
||||
|
||||
* New warp-specialized persistent FP8 GEMM kernel [kernel schedules](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/gemm/kernel/sm90_gemm_tma_warpspecialized_cooperative.hpp) and [mainloops](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/gemm/collective/sm90_mma_tma_gmma_ss_warpspecialized_fp8.hpp) targeting Hopper architecture that achieve great performance with TMA, WGMMA, and threadblock clusters. An example showcasing [Hopper warp-specialized FP8 GEMMs](https://github.com/NVIDIA/cutlass/tree/main/examples/54_hopper_fp8_warp_specialized_gemm). FP8 GEMMs come with a fast accumulation mode. When enabled, problem execution might be faster but at the cost of lower accuracy because intermediate results will not periodically be promoted to a higher precision.
|
||||
* New [Epilogue Visitor Tree (EVT)](https://github.com/NVIDIA/cutlass/tree/main/examples/49_hopper_gemm_with_collective_builder/49_collective_builder.cu) support for Hopper TMA epilogues. EVTs allows for user-defined customized epilogue fusion patterns without having to write a new epilogue.
|
||||
* [Stream-K](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/gemm/kernel/sm90_tile_scheduler_stream_k.hpp) feature for Hopper. Note that this is only a functional implementation of stream-K, and should not be used for performance comparison. Optimizations are expected in a future release.
|
||||
* Improved CTA rasterization and support for CTA swizzling for Hopper kernels using the [Tile Scheduler](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/gemm/kernel/sm90_tile_scheduler.hpp).
|
||||
* Improved performance for [warp-specialized TensorFloat-32 (TF32) GEMM kernels](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/sm90_gemm_tf32_tf32_f32_tensor_op_f32_gmma_rs_cluster_warpspecialized.cu) targeting Hopper TMA.
|
||||
* [Hopper GEMM+Permute](https://github.com/NVIDIA/cutlass/tree/main/examples/53_hopper_gemm_permute/53_hopper_gemm_permute.cu), an example of fusing tensor reordering (permutation) with GEMM mainloop or epilogue.
|
||||
* New CUTLASS 2D Convolution Python interface. New [example](https://github.com/NVIDIA/cutlass/tree/main/examples/python/03_basic_conv2d.ipynb) here.
|
||||
* Support for Windows (MSVC) builds. Tested with Visual Studio 2019 v16.11.27 on Windows 10.0.
|
||||
* Optimal performance using [**CUDA 12.2u1**](https://developer.nvidia.com/cuda-downloads)
|
||||
* Updates and bugfixes from the community (thanks!)
|
||||
|
||||
## [3.1.0](https://github.com/NVIDIA/cutlass/releases/tag/v3.1.0) (2023-04-14)
|
||||
* New CUTLASS Python interface that aims to provide an ease-of-use interface for instantiating, emitting, compiling, and running CUTLASS kernels via Python. More details [here](https://github.com/NVIDIA/cutlass/tree/main/python/README.md) and new [examples](https://github.com/NVIDIA/cutlass/tree/main/examples/python).
|
||||
* New [efficient epilogues](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/sm90_gemm_f16_f16_f16_tensor_op_f32_cluster_warpspecialized_cooperative.cu#L783) using TMA for Hopper.
|
||||
* Support for [fused epilogues](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/sm90_gemm_f16_f16_f16_tensor_op_f32_cluster_warpspecialized_cooperative_bias_elementwise.cu), such Bias, ReLU and GELU, using the new efficient epilogues.
|
||||
* New [warp-specialized TensorFloat-32 (TF32) GEMM kernels](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/sm90_gemm_tf32_tf32_f32_tensor_op_f32_gmma_rs_cluster_warpspecialized.cu) targeting Hopper TMA.
|
||||
* New [*warp-specialized persistent cooperative*](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/gemm/kernel/sm90_gemm_tma_warpspecialized_cooperative.hpp) kernel design that allows for larger tile sizes and improves performance on Hopper.
|
||||
* An [example](https://github.com/NVIDIA/cutlass/tree/main/examples/51_hopper_gett) showcasing GEMM-Like Tensor-Tensor Contraction (GETT) capability on Hopper.
|
||||
* Epilogue builders. Similar to mainloop builders (see [example 49](https://github.com/NVIDIA/cutlass/tree/main/examples/49_hopper_gemm_with_collective_builder/49_collective_builder.cu)), epilogue builders aim to generate the best-possible epilogue while exposing incremental opt-ins for greater customization.
|
||||
* Profiler support for overriding kernel and epilogue builder auto schedules for 3.x API kernels, allowing specific policies to be run in the CUTLASS profiler.
|
||||
* Performance optimizations for the [*warp-specialized persistent ping-pong*](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/gemm/kernel/sm90_gemm_tma_warpspecialized_pingpong.hpp) kernel.
|
||||
* Changes to the [GEMM API 3.x](./media/docs/cpp/gemm_api_3x.md), involving the host-facing arguments and the underlying `Params` structs.
|
||||
* [FMHA Backward Pass](https://github.com/NVIDIA/cutlass/tree/main/examples/41_fused_multi_head_attention/fused_multi_head_attention_backward.cu) from Meta xFormers.
|
||||
* [Streamk GEMM with Broadcast](https://github.com/NVIDIA/cutlass/tree/main/examples/47_ampere_gemm_universal_streamk/ampere_gemm_universal_streamk_broadcast.cu) enables epilogue broadcast with StreamK GEMM.
|
||||
* [Batched B2B GEMM](https://github.com/NVIDIA/cutlass/tree/main/examples/13_two_tensor_op_fusion) now can run multiple Back-to-Back GEMM with the same problem size in parallel.
|
||||
* [Batched Strided GEMV](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/gemv.cu) support both row major and column major input matrix.
|
||||
* [Permute + GEMM fusion](https://github.com/NVIDIA/cutlass/tree/main/examples/39_gemm_permute) can fuse Permute with following GEMM now. Before, we only support fusing GEMM with Permute in the epilogue.
|
||||
* [Row Broadcast](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/epilogue/threadblock/predicated_tile_iterator_row_broadcast.h) can be fused in the epilogue.
|
||||
* The GitHub branch is renamed from `master` to `main` in this release.
|
||||
* Optimal performance using [**CUDA 12.1**](https://developer.nvidia.com/cuda-downloads)
|
||||
* Updates and bugfixes from the community (thanks!)
|
||||
|
||||
## [3.0.0](https://github.com/NVIDIA/cutlass/releases/tag/v3.0.0) (2023-01-23)
|
||||
* [CuTe](./media/docs/cpp/cute/00_quickstart.md), a [new core library and backend](./include/cute) for CUTLASS 3.0 that defines a single Layout vocabulary type and an associated algebra of layouts for a much more expressive and composable abstraction for tensors, sets of parallel agents, and operations by said agents on tensors.
|
||||
* [A new conceptual operation hierarchy](./media/docs/cpp/cutlass_3x_design.md) that replaces the architecture-centric hierarchy of CUTLASS 2.x and [documentation for CUTLASS 3.0's GEMM API changes](./media/docs/cpp/gemm_api_3x.md).
|
||||
* Strict API backwards compatibility that exposes both 2.x and 3.x API kernels through the same [`device::GemmUniversalAdapter`](./include/cutlass/gemm/device/gemm_universal_adapter.h) and [`kernel::GemmUniversal`](./include/cutlass/gemm/kernel/gemm_universal.hpp) types, allowing users to include both APIs in the same translation units. More information can be found in the [3.x backwards compatibility section](./media/docs/cpp/cutlass_3x_backwards_compatibility.md).
|
||||
* Updates to [Functionality](./media/docs/cpp/functionality.md) which directs users on which kernels are supported via CUTLASS-2 and CUTLASS-3.
|
||||
* Updates to [Compatibility](./README.md#compatibility) Section regarding supported compilers, operating systems, CUDA Toolkits, Hardware Architectures and [Target Architecture](./README.md#target-architecture).
|
||||
* New warp-specialized GEMM [kernel schedules](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/gemm/kernel/sm90_gemm_tma_warpspecialized.hpp) and [mainloops](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/gemm/collective/sm90_mma_tma_gmma_ss_warpspecialized.hpp) targeting Hopper architecture that achieve great performance with TMA, WGMMA, and threadblock clusters.
|
||||
* Extensions to CUTLASS profiler to support threadblock cluster shapes in library and profiler tile configurations.
|
||||
* [CUTLASS library integration](https://github.com/NVIDIA/cutlass/tree/main/tools/library/src/gemm_operation_3x.hpp) for 3.x API kernels built through the new `CollectiveBuilder` API, enabling CUTLASS profiler.
|
||||
* Support for [Hopper GEMMs](https://github.com/NVIDIA/cutlass/tree/main/examples/48_hopper_warp_specialized_gemm) through the new 3.0 API with CuTe-based exposure of the Hopper [Tensor Memory Accelerator](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-tensor) and [WGMMA Tensor Core](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#asynchronous-warpgroup-level-matrix-instructions) features.
|
||||
* Set of examples that demonstrate the usage of the new 3.0 API to easily build GEMM kernels targeting Hopper: examples [48](https://github.com/NVIDIA/cutlass/tree/main/examples/48_hopper_warp_specialized_gemm), [49](https://github.com/NVIDIA/cutlass/tree/main/examples/49_hopper_gemm_schedules_with_collective_builder), and [50](https://github.com/NVIDIA/cutlass/tree/main/examples/50_hopper_gemm_with_epilogue_swizzle).
|
||||
|
||||
# CUTLASS 2.x
|
||||
|
||||
## [2.11.0](https://github.com/NVIDIA/cutlass/releases/tag/v2.11.0) (2022-11-19)
|
||||
* [Stream-K](https://github.com/NVIDIA/cutlass/tree/main/examples/47_ampere_gemm_universal_streamk), which is a new general way to do split-K. It can not only improve performance, but can also significantly reduce the number of tile sizes that need to be profiled to find the best one.
|
||||
* [Fused multi-head attention Kernel](https://github.com/NVIDIA/cutlass/tree/main/examples/41_fused_multi_head_attention). It has two variants: one uses batched GEMM for the fixed sequence length, and the other one uses group GEMM for the variable sequence length. Both versions just need one kernel.
|
||||
* [Dual GEMM](https://github.com/NVIDIA/cutlass/tree/main/examples/45_dual_gemm), which can fuse A x B and A x C into one kernel. Two GEMMs has no producer-consumer dependency.
|
||||
* Hopper improves [double precision matrix multiplication](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/gemm_f64n_f64t_f64t_tensor_op_f64_sm90.cu) by 2x compared to Ampere at iso-clocks. It is supported since CUDA 11.8.
|
||||
* [BLAS3](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/hemm_cf64_cf64_cf64_tensor_op_f64_sm90.cu) functions with Hoppers new double precision matrix multiplication instructions.
|
||||
* [ELL Block Sparse GEMM](https://github.com/NVIDIA/cutlass/tree/main/examples/43_ell_block_sparse_gemm), which uses an [ELL matrix](https://developer.nvidia.com/blog/accelerating-matrix-multiplication-with-block-sparse-format-and-nvidia-tensor-cores/) to describe the sparsity of A matrix. B and output matrices are still dense. The block size can be arbitary.
|
||||
* Optimized [Group Conv](https://github.com/NVIDIA/cutlass/tree/main/examples/42_ampere_tensorop_group_conv) for SingleGroup mode, which requires that the output channel per group is a multiple of Threadblock tile N.
|
||||
* [Optimized DepthWise Conv](https://github.com/NVIDIA/cutlass/tree/main/examples/46_depthwise_simt_conv2dfprop/depthwise_simt_conv2dfprop.cu). Two new modes are added
|
||||
* [kOptimized](https://github.com/NVIDIA/cutlass/tree/main/test/unit/conv/device/depthwise_conv2d_fprop_direct_conv_f16nhwc_f16nhwc_f16nhwc_simt_f16_sm60.cu) - use direct conv to compute instead of implicit GEMM.
|
||||
* The restrictions are: 1) input ,output channel and group number should be multiple of (128 / sizeof(input element)). 2) The input filter size should be the same as the template parameter configuration.
|
||||
* [kFixedStrideDilation](https://github.com/NVIDIA/cutlass/tree/main/test/unit/conv/device/depthwise_conv2d_fprop_direct_conv_fixed_stride_dilation_f16nhwc_f16nhwc_f16nhwc_simt_f16_sm60.cu) - which puts stride and dilation into templates to further improve the performance. In this mode, kernel persistents some inputs into register to squeeze more performance, so large filter/stride/dilation is not recommanded.
|
||||
* The restrictions are: 1) input, output channel and group number should be multiple of (128 / sizeof(input element)). 2) input filter size, stride, dilation should same as the template parameter configuration.
|
||||
* [Scripts](https://github.com/NVIDIA/cutlass/tree/main/examples/44_multi_gemm_ir_and_codegen) to fuse multiple back-to-back GEMM. Its implementation was discussed in a GTC'22 Spring [talk](https://www.nvidia.com/en-us/on-demand/session/gtcspring22-s41606/).
|
||||
* [FP8 data type definition](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/float8.h) and [conversion routines](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/numeric_conversion.h#L1274-2115).
|
||||
* Updates and bugfixes from the community (thanks!). Big shout out to Meta's [xFormers](https://github.com/facebookresearch/xformers).
|
||||
|
||||
* **Deprecation announcement:** CUTLASS plans to deprecate the following:
|
||||
* Maxwell and Pascal GPU architectures
|
||||
* Ubuntu 16.04
|
||||
* CUDA 10.2
|
||||
|
||||
## [2.10.0](https://github.com/NVIDIA/cutlass/releases/tag/v2.10.0) (2022-08-23)
|
||||
* [CUTLASS Python](https://github.com/NVIDIA/cutlass/tree/main/examples/40_cutlass_py) now supports GEMM, CONV, Group GEMM for different data types as well as different epilogue flavours.
|
||||
* Optimizations for CUTLASS's [Grouped GEMM](https://github.com/NVIDIA/cutlass/tree/main/examples/24_gemm_grouped/gemm_grouped.cu) kernel. Threadblock scheduling part is improved. Some computation can be moved to the host side if applicable. [Grouped Syr2k](https://github.com/NVIDIA/cutlass/tree/main/examples/38_syr2k_grouped/syr2k_grouped.cu) kernels are added, too.
|
||||
* Optimizations for [GEMM+Softmax](https://github.com/NVIDIA/cutlass/tree/main/examples/35_gemm_softmax). All the reduction computation is fused into the previous GEMM. More template arguments are provided to fine tune the performance.
|
||||
* [Grouped GEMM for Multihead Attention](https://github.com/NVIDIA/cutlass/tree/main/examples/41_multi_head_attention). This general group gemm based MHA does not require the sequence length of all GEMMs to be the same which makes it most useful for natural language processing.
|
||||
* [GEMM + Layer norm fusion for Ampere](https://github.com/NVIDIA/cutlass/tree/main/examples/37_gemm_layernorm_gemm_fusion/) splits the layernorm into two parts and both of them can be fused into the GEMMs before and after separately. In addition to use square sum to compute variance of layernorm, [Shift-K](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Computing_shifted_data) is provided if square sum raise numerical issues.
|
||||
* [GEMM Epilogue Permutation Fusion](https://github.com/NVIDIA/cutlass/tree/main/examples/39_gemm_permute) can apply user provided permutation layout mapping in the GEMM epilogue.
|
||||
* [Grouped convolution targeting implicit GEMM](https://github.com/NVIDIA/cutlass/tree/main/test/unit/conv/device/group_conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.cu) introduces the first group convolution implementation to CUTLASS. It is an Analytical implementation, not an Optimized. The restrictions are: 1) input and output channel number should be multiple of group number. 2) split-K is not supported. The implementation has 2 modes:
|
||||
* kSingleGroup: output channel per group is multiple of Threadblock tile N.
|
||||
* kMultipleGroup: Threadblock tile N is multiple of output channel per group.
|
||||
* [Depthwise separable convolution](https://github.com/NVIDIA/cutlass/tree/main/test/unit/conv/device/depthwise_conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_simt_f16_sm60.cu) introduces the first depthwise convolution which is also Analytical for now. The restrictions are: 1) SIMT only 2) No split-K 3) input channel equals to output channel equals to group number.
|
||||
* Standalone [Layernorm](https://github.com/NVIDIA/cutlass/tree/main/tools/util/include/cutlass/util/device_layernorm.h) and [Pooling](https://github.com/NVIDIA/cutlass/tree/main/tools/util/include/cutlass/util/device_nhwc_pooling.h) kernels.
|
||||
* [Back-to-back GEMM/CONV](https://github.com/NVIDIA/cutlass/tree/main/examples/13_two_tensor_op_fusion) relaxes the requirement that the first GEMM K dimension needs to be the multiple of Threadblock Tile K dimension.
|
||||
* Optimal performance using [**CUDA 11.6u2**](https://developer.nvidia.com/cuda-downloads)
|
||||
* Updates and bugfixes from the community (thanks!)
|
||||
|
||||
## [2.9.0](https://github.com/NVIDIA/cutlass/releases/tag/v2.9.0) (2022-04-21)
|
||||
|
||||
* [First layer Convolution kernels](https://github.com/NVIDIA/cutlass/tree/main/test/unit/conv/device/conv2d_fprop_fixed_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.cu) specialized for small channel counts and reduced alignment
|
||||
* [Few channels](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/conv/threadblock/conv2d_fprop_activation_tile_access_iterator_few_channels.h) specialization for reduced alignment capabilities
|
||||
* [Fixed channels](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/conv/threadblock/conv2d_fprop_activation_tile_access_iterator_fixed_channels.h) further specialized when channel count perfectly matches the access vector size
|
||||
* [Unit tests](https://github.com/NVIDIA/cutlass/tree/main/test/unit/conv/device/conv2d_fprop_few_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.cu)
|
||||
* [Python-based instance emitter](https://github.com/NVIDIA/cutlass/tree/main/python/cutlass_library/generator.py) in the CUTLASS Library and support in the Profiler
|
||||
* [BLAS3](https://docs.nvidia.com/cuda/cublas/index.html#cublas-level-3-function-reference) operators accelerated by Tensor Cores
|
||||
* Supported types: f32, cf32, f64, cf64, tf32x3, complex tf32x3
|
||||
* [HERK](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/her2k_cf32h_cf32n_tensor_op_fast_f32_sm80.cu) with [emitter](https://github.com/NVIDIA/cutlass/tree/main/python/cutlass_library/rank_k_operation.py)
|
||||
* [SYRK](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/syrk_f32n_f32t_tensor_op_fast_f32_sm80.cu) with [emitter](https://github.com/NVIDIA/cutlass/tree/main/python/cutlass_library/rank_k_operation.py)
|
||||
* [SYMM](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/symm_f32n_f32n_tensor_op_fast_f32_ls_sm80.cu) with [emitter](https://github.com/NVIDIA/cutlass/tree/main/python/cutlass_library/symm_operation.py)
|
||||
* [TRMM](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/trmm_f32n_f32t_f32t_tensor_op_fast_f32_ls_sm80.cu) with [emitter](https://github.com/NVIDIA/cutlass/tree/main/python/cutlass_library/trmm_operation.py)
|
||||
* [Unit tests](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/testbed_rank_k_universal.h)
|
||||
* [CUTLASS Python](https://github.com/NVIDIA/cutlass/tree/main/examples/40_cutlass_py) demonstrating JIT compilation of CUTLASS kernels and a Python-based runtime using [CUDA Python](https://developer.nvidia.com/cuda-python)
|
||||
* [Python-based runtime](https://github.com/NVIDIA/cutlass/tree/main/tools/library/scripts/rt.py) interoperable with existing emitters
|
||||
* [GEMM + Softmax example](https://github.com/NVIDIA/cutlass/tree/main/examples/35_gemm_softmax)
|
||||
* [Gather and Scatter Fusion with GEMM](https://github.com/NVIDIA/cutlass/tree/main/examples/36_gather_scatter_fusion) can gather inputs and scatters outputs based on indices vectors in the same GEMM kernel.
|
||||
* It can select random rows in a row major matrix.
|
||||
* It can select random columns in a column major matrix.
|
||||
* [Back-to-back GEMM/CONV](https://github.com/NVIDIA/cutlass/tree/main/examples/13_two_tensor_op_fusion) fully supports buffering the first GEMM/CONV results in the shared memory for the latter one to use. It can eliminate register spill when the tile size is big. Additionally, bias vector add is supported in the first GEMM/CONV.
|
||||
* Supported kernels: GEMM and CONV.
|
||||
* Supported types: fp16 and int8.
|
||||
* Supported architectures: Turing and Ampere.
|
||||
* [Transposed Convolution](https://github.com/NVIDIA/cutlass/tree/main/examples/34_transposed_conv2d) (a.k.a Deconvolution) support which reuses Dgrad implementation.
|
||||
* [Utility functions](https://github.com/NVIDIA/cutlass/tree/main/tools/util/include/cutlass/util) that can pad NHWC and convert between NCHW and NHWC.
|
||||
* [Small alignment implicit gemm](https://github.com/NVIDIA/cutlass/issues/242) support for Fprop/Dgrad/Wgrad so that padding is no longer mandated to use tensor cores in these kernels.
|
||||
* Epilogue enhancement:
|
||||
* Eliminate bank conflicts in int8 tensor core kernels.
|
||||
* Half2 usage if epilogue compute type is fp16.
|
||||
* More activation functions: Silu, Hardswish, Leaky Relu.
|
||||
* New elementwise fusion pattern for [residual block](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/epilogue/thread/linear_combination_residual_block.h).
|
||||
* [Group GEMM](https://github.com/NVIDIA/cutlass/tree/main/examples/24_gemm_grouped) thread block number calculation fix which helps to launch the intended number of threadblocks to fully occupy the GPUs.
|
||||
* [Parallel GEMM splitk](https://github.com/NVIDIA/cutlass/pull/277) support in the CUTLASS profiler.
|
||||
* Optimal performance using [**CUDA 11.6u2**](https://developer.nvidia.com/cuda-downloads)
|
||||
* Updates and bugfixes from the community (thanks!)
|
||||
|
||||
|
||||
## [2.8.0](https://github.com/NVIDIA/cutlass/releases/tag/v2.8.0) (2021-11-19)
|
||||
|
||||
* **TF32x3:** emulated single-precision using Tensor Cores
|
||||
* 45+ TFLOPs on NVIDIA A100
|
||||
* [GEMM SDK example](https://github.com/NVIDIA/cutlass/tree/main/examples/27_ampere_3xtf32_fast_accurate_tensorop_gemm/27_ampere_3xtf32_fast_accurate_tensorop_gemm.cu) (real)
|
||||
* [COMPLEX GEMM SDK example](https://github.com/NVIDIA/cutlass/tree/main/examples/29_ampere_3xtf32_fast_accurate_tensorop_complex_gemm/29_3xtf32_complex_gemm.cu) (complex)
|
||||
* [Implicit GEMM Convolution SDK example](https://github.com/NVIDIA/cutlass/tree/main/examples/28_ampere_3xtf32_fast_accurate_tensorop_fprop/ampere_3xtf32_fast_accurate_tensorop_fprop.cu)
|
||||
* **Mainloop fusion for Convolution:** convolution with fused per-channel scale-bias-relu
|
||||
* [Conv Fprop SDK example](https://github.com/NVIDIA/cutlass/tree/main/examples/25_ampere_fprop_mainloop_fusion/ampere_fprop_mainloop_fusion.cu)
|
||||
* [Conv WGrad SDK example](https://github.com/NVIDIA/cutlass/tree/main/examples/26_ampere_wgrad_mainloop_fusion/ampere_wgrad_mainloop_fusion.cu)
|
||||
* [cutlass::conv::device::ImplicitGemmConvolutionFusion](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/conv/device/implicit_gemm_convolution_fusion.h)
|
||||
* **Grouped GEMM:** similar to batched GEMM with distinct problem size per group
|
||||
* [SDK example](https://github.com/NVIDIA/cutlass/tree/main/examples/24_gemm_grouped) with performance comparison with Batched Strided GEMM
|
||||
* [cutlass::gemm::device::GemmGrouped](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/gemm/device/gemm_grouped.h)
|
||||
* [Implicit GEMM Convolution fusion](https://github.com/NVIDIA/cutlass/tree/main/examples/13_two_tensor_op_fusion/) supports staging 1st convolution's output accumulator in the shared memory on Turing. This allows more flexible warp tile sizes and less regsiter pressue.
|
||||
* Optimal performance using [**CUDA 11.5**](https://developer.nvidia.com/cuda-downloads)
|
||||
* Updates from the community (thanks!)
|
||||
|
||||
* **Deprecation announcement:** CUTLASS plans to deprecate the following:
|
||||
* Maxwell and Pascal GPU architectures
|
||||
* Ubuntu 16.04
|
||||
* CUDA 10.2
|
||||
|
||||
## [2.7.0](https://github.com/NVIDIA/cutlass/releases/tag/v2.7.0) (2021-09-24)
|
||||
* Mainloop fusion for GEMM: [summation over A or B](https://github.com/NVIDIA/cutlass/tree/main/examples/23_ampere_gemm_operand_reduction_fusion/ampere_gemm_operand_reduction_fusion.cu)
|
||||
* [Strided DGRAD (optimized iterators)](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/conv/kernel/default_conv2d_dgrad.h)
|
||||
* [Half-precision GELU_taylor activation functions](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/epilogue/thread/activation.h#L196)
|
||||
* Use these when accumulation and epilogue compute types are all `cutlass::half_t`
|
||||
* Tuning and bug fixes to [fused GEMM + GEMM example](https://github.com/NVIDIA/cutlass/tree/main/examples/13_two_tensor_op_fusion/)
|
||||
* Support for smaller than 128b aligned Convolutions: [see examples](https://github.com/NVIDIA/cutlass/tree/main/test/unit/conv/device/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.cu#L272)
|
||||
* Caching of results to accelerate Convolution [unit tests](https://github.com/NVIDIA/cutlass/tree/main/test/unit/conv/device/cache_testbed_output.h)
|
||||
* Can be enabled or disabled by running `cmake .. -DCUTLASS_TEST_ENABLE_CACHED_RESULTS=OFF`
|
||||
* Corrections and bug fixes reported by the CUTLASS community
|
||||
* Thank you for filing these issues!
|
||||
|
||||
## [2.6.1](https://github.com/NVIDIA/cutlass/releases/tag/v2.6.1) (2021-09-03)
|
||||
* Arbitrary padding and striding for CUTLASS Strided DGRAD Convolution operator (Analytic Iterators)
|
||||
* Tuning for GEMMs fused with partial reductions
|
||||
* Corrections and bug fixes reported by the CUTLASS community
|
||||
* Thank you for filing these issues!
|
||||
|
||||
## [2.6.0](https://github.com/NVIDIA/cutlass/releases/tag/v2.6.0) (2021-07-22)
|
||||
* Optimal performance when compiled with the [CUDA 11.4 Toolkit](https://developer.nvidia.com/cuda-toolkit)
|
||||
* Adopt the new L2 prefetch feature in [cp.async](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/arch/memory.h) and [global load](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/arch/memory_sm80.h)
|
||||
* Fused operators with GEMM and Convolution
|
||||
* [Fused broadcast in epilogue](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/gemm_with_broadcast_f16n_f16n_f16n_tensorop_f32_sm75.cu)
|
||||
* [Fused partial reduction in epilogue](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/gemm_with_reduction_f16n_f16n_f16n_tensorop_f32_sm75.cu)
|
||||
* 64b tensor strides and leading dimensions support for GEMMs
|
||||
* Affine rank=2 matrix layouts
|
||||
* Row stride and column stride for matrices using [cutlass::layout::AffineRank2](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/layout/matrix.h)
|
||||
* Support [FP64 tensor core](https://github.com/NVIDIA/cutlass/tree/main/examples/18_ampere_fp64_tensorop_affine2_gemm/ampere_fp64_tensorop_affine2_gemm.cu) and SIMT GEMM.
|
||||
* [Batched GEMV](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/gemv.cu) preview implementation
|
||||
* [New strided Dgrad](https://github.com/NVIDIA/cutlass/tree/main/test/unit/conv/device/conv2d_strided_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.cu) implementation
|
||||
* Accelerates over previous implementation by cutting down redundant math by 4x
|
||||
* Support using new `Dy` and `w` analytic iterators and existing `cutlass::conv::device::ImplicitGemmConvolution` interface
|
||||
* Quaternion-valued GEMM and Convolution in single- and double-precision (targeting CUDA Cores)
|
||||
* Updates to [quaternion.h](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/quaternion.h) and [functional.h](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/functional.h)
|
||||
* SDK Example for [GEMM](https://github.com/NVIDIA/cutlass/tree/main/examples/21_quaternion_gemm/quaternion_gemm.cu) and [Convolution](https://github.com/NVIDIA/cutlass/tree/main/examples/22_quaternion_conv/quaternion_conv.cu)
|
||||
* [Unit tests for GEMM](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/simt_qgemm_nn_sm50.cu) and [Convolution](https://github.com/NVIDIA/cutlass/tree/main/test/unit/conv/device/conv2d_fprop_implicit_gemm_qf32nhwc_qf32nhwc_qf32nhwc_simt_f32_sm50.cu)
|
||||
* Many improvements to the epilogue.
|
||||
* Provide an [option](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/epilogue/threadblock/epilogue.h) to not fully unroll the epilogue to reduce the code size and improve the performance when using complicated elementwise operations
|
||||
* Performance improvement for FP16 tensor core kernels
|
||||
* Bug fixes
|
||||
* Enhanced Clang support and the combination of Clang 13 and CUDA 11.4 can build and run kernels from Pascal and Ampere.
|
||||
* Updated minimum CUDA Toolkit requirement to 10.2
|
||||
* [CUDA 11.4 Toolkit](https://developer.nvidia.com/cuda-toolkit) recommended
|
||||
* Corrections and bug fixes reported by the CUTLASS community
|
||||
* Thank you for filing these issues!
|
||||
|
||||
## [2.5.0](https://github.com/NVIDIA/cutlass/releases/tag/v2.5.0) (2021-02-26)
|
||||
* Tensor reductions
|
||||
* _m_-to-_n_ reductions of tensors with affine layout
|
||||
* [Specializations](https://github.com/NVIDIA/cutlass/tree/main/test/unit/reduction/device/tensor_reduce_contiguous.cu) for reductions including contiguous dimension
|
||||
* [Specializations](https://github.com/NVIDIA/cutlass/tree/main/test/unit/reduction/device/tensor_reduce_strided.cu) for reductions excluding contiguous dimension
|
||||
* Custom reduction functors such as `cutlass::logical_and`
|
||||
* Large tensor support, up to 2^63 elements (however, each dimension is limited to an extent of 2^31)
|
||||
* Optimizations for 3-D convolution
|
||||
* [Optimized tile iterators](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/conv/threadblock/conv3d_fprop_activation_tile_access_iterator_optimized.h) using precomputed delta table for 3-D convolution
|
||||
* Full coverage of [forward](https://github.com/NVIDIA/cutlass/tree/main/test/unit/conv/device/conv3d_fprop_implicit_gemm_f16ndhwc_f16ndhwc_f32ndhwc_tensor_op_f32_sm80.cu) and [backwards](https://github.com/NVIDIA/cutlass/tree/main/test/unit/conv/device/conv3d_dgrad_implicit_gemm_f16ndhwc_f16ndhwc_f32ndhwc_tensor_op_f32_sm80.cu) passes for 3D convolution
|
||||
* [Fused Convolution+Convolution example](https://github.com/NVIDIA/cutlass/tree/main/examples/13_two_tensor_op_fusion/README.md)
|
||||
* Corrections and bug fixes reported by the CUTLASS community
|
||||
* Thank you for filing these issues!
|
||||
|
||||
|
||||
## [2.4.0](https://github.com/NVIDIA/cutlass/releases/tag/v2.4.0) (2020-11-19)
|
||||
* Implicit GEMM convolution kernels supporting CUDA and Tensor Cores on NVIDIA GPUs
|
||||
* Operators: forward (Fprop), backward data gradient (Dgrad), and backward weight gradient (Wgrad) convolution
|
||||
* Data type: FP32, complex<FP32>, Tensor Float 32 (TF32), BFloat16 (BF16), Float16, Int4, Int8, Int32
|
||||
* Spatial dimensions: 1-D, 2-D, and 3-D
|
||||
* Layout: NHWC, NCxHWx
|
||||
* Implicit GEMM convolution components:
|
||||
* Global memory iterators supporting Fprop, Dgrad, and Wgrad
|
||||
* `MmaMultistage` for implicit GEMM convolution for NVIDIA Ampere architecture
|
||||
* `MmaPipeline` for implicit GEMM convolution for NVIDIA Volta and Turing architectures
|
||||
* [Documentation](./media/docs/cpp/implicit_gemm_convolution.md) describing Implicit GEMM Convolution algorithm and implementation
|
||||
|
||||
## [2.3.0](https://github.com/NVIDIA/cutlass/releases/tag/v2.3.0) (2020-09-23)
|
||||
* [NVIDIA Ampere Architecture features](https://devblogs.nvidia.com/nvidia-ampere-architecture-in-depth/)
|
||||
* [Sparse Tensor Core GEMM kernels](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/gemm_f16n_f16n_f32t_tensor_op_f32_sparse_sm80.cu):
|
||||
* Direct access to Sparse Tensor Cores and maximum performance via [`mma.sp.sync`](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-instructions-mma-and-friends)
|
||||
* Fast SGEMM targeting GeForce RTX 30-series CUDA Cores
|
||||
* Minor Features:
|
||||
* [Activation functions](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/epilogue/thread/activation.h) such as [GeLU](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/epilogue/thread/linear_combination_gelu.h) and [Sigmoid](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/epilogue/thread/linear_combination_sigmoid.h)
|
||||
* Small [matrix](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/matrix.h) and [quaternion](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/quaternion.h) template classes in device code
|
||||
* [Floating-point constants](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/constants.h)
|
||||
* NVIDIA Ampere GPU Architecture examples and documentation:
|
||||
* [Tensor Float 32](https://github.com/NVIDIA/cutlass/tree/main/examples/14_ampere_tf32_tensorop_gemm/ampere_tf32_tensorop_gemm.cu) and
|
||||
* [Sparse Tensor Cores](https://github.com/NVIDIA/cutlass/tree/main/examples/15_ampere_sparse_tensorop_gemm/ampere_sparse_tensorop_gemm.cu)
|
||||
* Documentation added on CUTLASS [efficient row-major epilogue](./media/docs/cpp/gemm_api.md#efficient-epilogue)
|
||||
|
||||
## [2.2.0](https://github.com/NVIDIA/cutlass/releases/tag/v2.2.0) (2020-06-08)
|
||||
* [NVIDIA Ampere Architecture features](https://devblogs.nvidia.com/nvidia-ampere-architecture-in-depth/)
|
||||
* Fast Tensor Core operations:
|
||||
* Maximum performance via [`mma.sync`](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-instructions-mma-and-friends)
|
||||
* Tensor Float 32, BFloat16, and double-precision data types
|
||||
* Mixed integer data types (int8, int4, bin1)
|
||||
* Asynchronous copy for deep software pipelines via [`cp.async`](https://docs.nvidia.com/cuda/parallel-thread-execution)
|
||||
* Described in [GTC 2020 Webinar (SR 21745)](https://developer.nvidia.com/gtc/2020/video/s21745) (free registration required)
|
||||
* Features:
|
||||
* SDK examples showing GEMM fused with bias+relu and fused GEMM+GEMM
|
||||
* Complex-valued GEMMs targeting NVIDIA Ampere Tensor Cores in double-precision and Tensor Float 32
|
||||
* Gaussian complex GEMMs using 3m complex multiply algorithm
|
||||
* Universal GEMM kernel supporting two batch modes and two algorithms for parallel reductions
|
||||
* Policy updates:
|
||||
* [CUDA 11 Toolkit](https://developer.nvidia.com/cuda-toolkit) needed to enable NVIDIA Ampere Architecture features
|
||||
* Disabled F16C by default for compatibility - enable on cmake command line with `-DCUTLASS_ENABLE_F16C=ON`
|
||||
|
||||
## [2.1.0](https://github.com/NVIDIA/cutlass/releases/tag/v2.1.0) (2020-04-06)
|
||||
* BLAS-style host-side API added to [CUTLASS Library](./media/docs/cpp/quickstart.md#cutlass-library)
|
||||
* API to launch compiled kernel instances for GEMM and planar complex GEMM
|
||||
* Planar Complex GEMM kernels targeting Volta and Turing Tensor Cores
|
||||
* Computes complex matrix products on matrices stored as disjoint real and imaginary parts
|
||||
* [SDK Examples of Planar Complex GEMMs](https://github.com/NVIDIA/cutlass/tree/main/examples/10_planar_complex/planar_complex.cu)
|
||||
* Minor enhancements and bug fixes
|
||||
|
||||
## [2.0.0](https://github.com/NVIDIA/cutlass/releases/tag/v2.0.0) (2019-11-19)
|
||||
* Substantially refactored for
|
||||
* Better performance, particularly for native Turing Tensor Cores
|
||||
* Robust and durable templates spanning the design space
|
||||
* Encapsulated functionality embodying modern C++11 programming techniques
|
||||
* Optimized containers and data types for efficient, generic, portable device code
|
||||
* Updates to:
|
||||
* [Quick start guide](./media/docs/cpp/quickstart.md)
|
||||
* [Documentation](./README.md#documentation)
|
||||
* [Utilities](./media/docs/cpp/utilities.md)
|
||||
* [CUTLASS Profiler](./media/docs/cpp/profiler.md)
|
||||
* Native Turing Tensor Cores
|
||||
* Efficient GEMM kernels targeting Turing Tensor Cores
|
||||
* Mixed-precision floating point, 8-bit integer, 4-bit integer, and binarized operands
|
||||
* Coverage of existing CUTLASS functionality
|
||||
* GEMM kernels targeting CUDA and Tensor Cores in NVIDIA GPUs
|
||||
* Volta Tensor Cores through native mma.sync and through WMMA API
|
||||
* Optimizations such as parallel reductions, threadblock rasterization, and intra-threadblock reductions
|
||||
* Batched GEMM operations
|
||||
* Complex-valued GEMMs
|
||||
* **Note: a host compiler supporting C++11 or greater is required.**
|
||||
|
||||
# CUTLASS 1.x
|
||||
|
||||
## [1.3.2](https://github.com/NVIDIA/cutlass/releases/tag/v1.3.2) (2019-07-09)
|
||||
* Performance improvement for Volta Tensor Cores TN and TT layouts.
|
||||
|
||||
## [1.3.1](https://github.com/NVIDIA/cutlass/releases/tag/v1.3.1) (2019-04-09)
|
||||
* Corrected NVRTC unit tests.
|
||||
|
||||
## [1.3.0](https://github.com/NVIDIA/cutlass/releases/tag/v1.3.0) (2019-03-20)
|
||||
* Efficient GEMM kernel targeting Volta Tensor Cores via `mma.sync` instruction added in CUDA 10.1.
|
||||
|
||||
## [1.2.0](https://github.com/NVIDIA/cutlass/releases/tag/v1.2.0) (2018-10-26)
|
||||
* Parallelized reductions across threadblocks ("Split-K")
|
||||
* Improved IGEMM performance
|
||||
* Batched strided WMMA GEMMs
|
||||
|
||||
## [1.1.0](https://github.com/NVIDIA/cutlass/releases/tag/v1.1.0) (2018-09-19)
|
||||
* Turing Features
|
||||
* WMMA GEMM targeting TensorCores - INT8, INT4, 1-bit
|
||||
* Batched Strided GEMM
|
||||
* Threadblock rasterization strategies
|
||||
* Improved performance for adverse problem sizes and data layouts
|
||||
* Extended CUTLASS Core comonents
|
||||
* Tensor views support arbitrary matrix and tensor layouts
|
||||
* Zip iterators for structuring multiple data streams
|
||||
* Enhanced CUTLASS utilities
|
||||
* Reference code for tensor operations in host and device code
|
||||
* Added HostMatrix<> for simplified matrix creation
|
||||
* Examples
|
||||
* Basic GEMM, tensor views, CUTLASS utilities, batched GEMM, WMMA GEMM
|
||||
|
||||
## [1.0.1](https://github.com/NVIDIA/cutlass/releases/tag/v1.0.1) (2018-06-11)
|
||||
|
||||
* Intra-threadblock reduction added for small threadblock tile sizes
|
||||
* sgemm_64x128x16, sgemm_128x128x16, sgemm_128x64x16, sgemm_128x32x16, sgemm_64x64x16, sgemm_64x32x16
|
||||
* igemm_32x32x128
|
||||
* GEMM _K_ residue handled during prologue prior to mainloop
|
||||
* Replaced Google Test copy with submodule. Use `git submodule init --recursive --update`
|
||||
|
||||
## [1.0.0](https://github.com/NVIDIA/cutlass/commit/2028ebe120aab22bfd0b2baf8902d4c9627eb33f) (2018-05-16)
|
||||
|
||||
* Substantial rewrite to accommodate new architecture
|
||||
* Kernels: SGEMM, DGEMM, IGEMM, HGEMM, WMMA GEMM
|
||||
* Unit and performance tests
|
||||
|
||||
## [0.0.1](https://github.com/NVIDIA/cutlass/commit/d08ba8ac46e2fa3f745e070c390182edb56b2e91) (2017-12-04)
|
||||
|
||||
* Initial release
|
||||
|
||||
|
||||
## Copyright
|
||||
|
||||
Copyright (c) 2017 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
```
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
3. Neither the name of the copyright holder nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
```
|
||||
|
|
@ -0,0 +1,112 @@
|
|||
cff-version: 1.2.0
|
||||
title: CUTLASS
|
||||
message: >-
|
||||
If you use this software, please cite using the
|
||||
following metadata.
|
||||
type: software
|
||||
authors:
|
||||
- given-names: Vijay
|
||||
family-names: Thakkar
|
||||
email: vithakkar@nvidia.com
|
||||
affiliation: NVIDIA
|
||||
- given-names: Pradeep
|
||||
family-names: Ramani
|
||||
email: prramani@nvidia.com
|
||||
affiliation: NVIDIA
|
||||
- given-names: Cris
|
||||
family-names: Cecka
|
||||
email: ccecka@nvidia.com
|
||||
affiliation: NVIDIA
|
||||
- given-names: Aniket
|
||||
family-names: Shivam
|
||||
email: ashivam@nvidia.com
|
||||
affiliation: NVIDIA
|
||||
- given-names: Honghao
|
||||
family-names: Lu
|
||||
email: honghaol@nvidia.com
|
||||
affiliation: NVIDIA
|
||||
- given-names: Ethan
|
||||
family-names: Yan
|
||||
email: etyan@nvidia.com
|
||||
affiliation: NVIDIA
|
||||
- given-names: Jack
|
||||
family-names: Kosaian
|
||||
email: jkosaian@nvidia.com
|
||||
affiliation: NVIDIA
|
||||
- given-names: Mark
|
||||
family-names: Hoemmen
|
||||
email: mhoemmen@nvidia.com
|
||||
affiliation: NVIDIA
|
||||
- given-names: Haicheng
|
||||
family-names: Wu
|
||||
email: haichengw@nvidia.com
|
||||
affiliation: NVIDIA
|
||||
- given-names: Andrew
|
||||
family-names: Kerr
|
||||
email: akerr@nvidia.com
|
||||
affiliation: NVIDIA
|
||||
- given-names: Matt
|
||||
family-names: Nicely
|
||||
email: mnicely@nvidia.com
|
||||
affiliation: NVIDIA
|
||||
- given-names: Duane
|
||||
family-names: Merrill
|
||||
email: dumerrill@nvidia.com
|
||||
affiliation: NVIDIA
|
||||
- given-names: Dustyn
|
||||
family-names: Blasig
|
||||
email: dblasig@nvidia.com
|
||||
affiliation: NVIDIA
|
||||
- given-names: Fengqi
|
||||
family-names: Qiao
|
||||
email: fqiao@nvidia.com
|
||||
affiliation: NVIDIA
|
||||
- given-names: Piotr
|
||||
family-names: Majcher
|
||||
email: pmajcher@nvidia.com
|
||||
affiliation: NVIDIA
|
||||
- given-names: Paul
|
||||
family-names: Springer
|
||||
email: pspringer@nvidia.com
|
||||
affiliation: NVIDIA
|
||||
- given-names: Markus
|
||||
family-names: Hohnerbach
|
||||
affiliation: NVIDIA
|
||||
email: mhohnerbach@nvidia.com
|
||||
- given-names: Jin
|
||||
family-names: Wang
|
||||
email: jinw@nvidia.com
|
||||
affiliation: NVIDIA
|
||||
- given-names: Manish
|
||||
family-names: Gupta
|
||||
affiliation: Google
|
||||
email: manigupta@google.com
|
||||
|
||||
|
||||
repository-code: 'https://github.com/NVIDIA/cutlass'
|
||||
abstract: >-
|
||||
CUTLASS is a collection of CUDA C++ template
|
||||
abstractions for implementing high-performance
|
||||
matrix-multiplication (GEMM) and related
|
||||
computations at all levels and scales within CUDA.
|
||||
It incorporates strategies for hierarchical
|
||||
decomposition and data movement similar to those
|
||||
used to implement cuBLAS and cuDNN. CUTLASS
|
||||
decomposes these "moving parts" into reusable,
|
||||
modular software components abstracted by C++
|
||||
template classes. These thread-wide, warp-wide,
|
||||
block-wide, and device-wide primitives can be
|
||||
specialized and tuned via custom tiling sizes, data
|
||||
types, and other algorithmic policy. The resulting
|
||||
flexibility simplifies their use as building blocks
|
||||
within custom kernels and applications.
|
||||
keywords:
|
||||
- 'cutlass, tensor cores, cuda, cute, nvidia, gpu, linear algebra, matrix computations'
|
||||
license: BSD-3-Clause
|
||||
license-url: https://github.com/NVIDIA/cutlass/blob/v3.0.0/LICENSE.txt
|
||||
version: '3.0.0'
|
||||
date-released: '2023-01-23'
|
||||
identifiers:
|
||||
- type: url
|
||||
value: "https://github.com/NVIDIA/cutlass/tree/v3.0.0"
|
||||
description: The GitHub release URL of tag 3.0.0
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,203 @@
|
|||

|
||||
|
||||
[README](./README.md#documentation) > **Contributors**
|
||||
|
||||
# CUTLASS C++ Developers **
|
||||
|
||||
Andrew Kerr<br />
|
||||
Paul Springer<br />
|
||||
Dustyn Blasig<br />
|
||||
Albert Xu<br />
|
||||
Junkai Wu<br />
|
||||
Xiuxia Zhang<br />
|
||||
Haicheng Wu<br />
|
||||
Jack Yang<br />
|
||||
Pradeep Ramani<br />
|
||||
Aditya Atluri<br />
|
||||
Han Li<br />
|
||||
Nick Zhao<br />
|
||||
Ivan Yin<br />
|
||||
Yu-Jung Chen<br />
|
||||
Markus Hoehnerbach<br />
|
||||
Honghao Lu<br />
|
||||
Mihir Awatramani<br />
|
||||
Hao Sheng<br />
|
||||
Zekun Fan<br />
|
||||
Aniket Shivam<br />
|
||||
Siyu Liu<br />
|
||||
Richard Cai<br />
|
||||
Vikas Gupta<br />
|
||||
Ethan Yan<br />
|
||||
Vijay Thakkar<br />
|
||||
Cris Cecka<br />
|
||||
Lawrence Ryan<br />
|
||||
Qun Song<br />
|
||||
Daniel Ricketts<br />
|
||||
dePaul Miller<br />
|
||||
Yuhan Li<br />
|
||||
Saman Ashkiani<br />
|
||||
Jack Chen<br />
|
||||
Shang Zhang<br />
|
||||
Petrick Liu<br />
|
||||
Questa Wang<br />
|
||||
Pramod Shenoy<br />
|
||||
Jack Kosaian<br />
|
||||
Yujia Zhai<br />
|
||||
Zhaodong Chen<br />
|
||||
Manas Sahni<br />
|
||||
Shunfan Shao<br />
|
||||
Fengqi Qiao<br />
|
||||
Serif Yesil<br />
|
||||
Aragorn Guan<br />
|
||||
Heidi He<br />
|
||||
Xiao Song<br />
|
||||
Sergey Klevtsov<br />
|
||||
Jiang Shao<br />
|
||||
Ruqing Xu<br />
|
||||
Mengyu Guo<br />
|
||||
Tao Xie<br />
|
||||
Linfeng Zheng<br />
|
||||
Harrison Barclay<br />
|
||||
Wenfei Tang<br />
|
||||
Diksha Gohlyan<br />
|
||||
Alexander Zhurkevich<br />
|
||||
Siyuan Fu<br />
|
||||
Hua Huang<br />
|
||||
Xiufan Liang<br />
|
||||
Ian Tramble<br />
|
||||
Ali Hassani<br />
|
||||
Shreya Gaur<br />
|
||||
|
||||
** _The list is sorted in order of the author's first contribution to the CUTLASS project._
|
||||
|
||||
# CUTLASS DSL Developers ***
|
||||
|
||||
Albert Di<br />
|
||||
Albert Xu<br />
|
||||
Anakin Zheng<br />
|
||||
Arvin Jou<br />
|
||||
Brandon Sun<br />
|
||||
Chenyang Xu<br />
|
||||
Chunyu Wang<br />
|
||||
Cris Cecka<br />
|
||||
dePaul Miller<br />
|
||||
Edward Cao<br />
|
||||
Fung Xie<br />
|
||||
Guray Ozen<br />
|
||||
Hao Hu<br />
|
||||
Hong Wang<br />
|
||||
Jeremy Furtek<br />
|
||||
Jie Fang <br />
|
||||
JingZe Cui<br />
|
||||
Kihiro Bando<br />
|
||||
Linfeng Zheng<br />
|
||||
Longsheng Du<br />
|
||||
Mina Sun<br />
|
||||
Mindy Li<br />
|
||||
Pradeep Ramani<br />
|
||||
Questa Wang<br />
|
||||
Serif Yesil<br />
|
||||
Tao Xie<br />
|
||||
Tina Li<br />
|
||||
Vicki Wang<br />
|
||||
Vincent Zhang<br />
|
||||
Vijay Thakkar<br />
|
||||
Xiao Dong<br />
|
||||
Xiaolei Shi<br />
|
||||
Xinyu Wang<br />
|
||||
Yihan Chen<br />
|
||||
Yuhan Li<br />
|
||||
Zekun Fan<br />
|
||||
|
||||
*** _Sorted in alphabetical order._
|
||||
|
||||
|
||||
# CuTe Developers
|
||||
|
||||
Cris Cecka<br />
|
||||
Vijay Thakkar<br />
|
||||
|
||||
|
||||
# CUTLASS Product Manager
|
||||
|
||||
Matthew Nicely<br />
|
||||
|
||||
|
||||
# Former CUTLASS Developers
|
||||
|
||||
Manish Gupta<br />
|
||||
Duane Merrill<br />
|
||||
Piotr Majcher<br />
|
||||
Naila Farooqui<br />
|
||||
Mark Hoemmen<br />
|
||||
Rawn Henry<br />
|
||||
Jin Wang<br />
|
||||
Timmy Liu<br />
|
||||
Manikandan Ananth<br />
|
||||
David Tanner<br />
|
||||
|
||||
|
||||
# Acknowledgements
|
||||
|
||||
Tri Dao<br />
|
||||
Jay Shah<br />
|
||||
Mehdi Amini<br />
|
||||
Larry Wu<br />
|
||||
Justin Holewinski<br />
|
||||
Timothy Costa<br />
|
||||
Julien Demouth<br />
|
||||
Brian Fahs<br />
|
||||
Michael Garland<br />
|
||||
Michael Goldfarb<br />
|
||||
Mostafa Hagog<br />
|
||||
Fei Hu<br />
|
||||
Alan Kaatz<br />
|
||||
Wei Liu<br />
|
||||
Tim Martin<br />
|
||||
Kevin Siu<br />
|
||||
Markus Tavenrath<br />
|
||||
John Tran<br />
|
||||
Yang Xu<br />
|
||||
Scott Yokim<br />
|
||||
Girish Bharambe<br />
|
||||
Luke Durant<br />
|
||||
Carter Edwards<br />
|
||||
Olivier Giroux<br />
|
||||
Stephen Jones<br />
|
||||
Rishkul Kulkarni<br />
|
||||
Bryce Lelbach<br />
|
||||
Joel McCormack<br />
|
||||
Kyrylo Perelygin<br />
|
||||
Sean Treichler<br />
|
||||
|
||||
# Copyright
|
||||
|
||||
Copyright (c) 2017 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
```
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
3. Neither the name of the copyright holder nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
```
|
||||
|
|
@ -0,0 +1,369 @@
|
|||
# Copyright (c) 2017 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
if (CUDA_COMPILER MATCHES "[Cc]lang")
|
||||
message(WARNING "CUDA_COMPILER flag is deprecated, set CMAKE_CUDA_COMPILER to desired compiler executable.")
|
||||
set(__CLANG_DEVICE_COMPILATION_REQUESTED ON)
|
||||
elseif(CUDA_COMPILER)
|
||||
message(WARNING "Deprecated flag CUDA_COMPILER used with unknown argument ${CUDA_COMPILER}, ignoring.")
|
||||
endif()
|
||||
|
||||
if (__CLANG_DEVICE_COMPILATION_REQUESTED AND NOT DEFINED CMAKE_CUDA_COMPILER)
|
||||
set(CMAKE_CUDA_COMPILER clang++) # We will let the system find Clang or error out
|
||||
endif()
|
||||
|
||||
enable_language(CUDA)
|
||||
find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
if(NOT CUDA_VERSION)
|
||||
# For backward compatibility with older CMake code.
|
||||
set(CUDA_VERSION ${CUDAToolkit_VERSION})
|
||||
set(CUDA_VERSION_MAJOR ${CUDAToolkit_VERSION_MAJOR})
|
||||
set(CUDA_VERSION_MINOR ${CUDAToolkit_VERSION_MINOR})
|
||||
endif()
|
||||
if(NOT CUDA_TOOLKIT_ROOT_DIR)
|
||||
# In some scenarios, such as clang device compilation, the toolkit root may not be set, so we
|
||||
# force it here to the nvcc we found via the CUDAToolkit package.
|
||||
get_filename_component(CUDA_TOOLKIT_ROOT_DIR "${CUDAToolkit_NVCC_EXECUTABLE}/../.." ABSOLUTE)
|
||||
endif()
|
||||
|
||||
if (CMAKE_CUDA_COMPILER_ID MATCHES "(nvcc|[Nn][Vv][Ii][Dd][Ii][Aa])")
|
||||
set(CUTLASS_NVCC_DEVICE_COMPILE ON CACHE BOOL "Using nvcc tools for device compilation")
|
||||
elseif (CMAKE_CUDA_COMPILER_ID MATCHES "[Cc]lang")
|
||||
set(CUTLASS_CLANG_DEVICE_COMPILE ON CACHE BOOL "Using Clang tools for device compilation")
|
||||
else()
|
||||
message(FATAL_ERROR "Unknown device-side compiler ${CMAKE_CUDA_COMPILER_ID} found. Set CMAKE_CUDA_COMPILER to either nvcc or clang++.")
|
||||
endif()
|
||||
|
||||
if (CUTLASS_CLANG_DEVICE_COMPILE AND CMAKE_VERSION VERSION_LESS_EQUAL "3.30")
|
||||
message(FATAL_ERROR "Clang device compilation for CUTLASS requires CMake 3.30 or higher.")
|
||||
endif()
|
||||
|
||||
if (CUDA_VERSION VERSION_LESS 9.2)
|
||||
message(FATAL_ERROR "CUDA 9.2+ required, found ${CUDA_VERSION}.")
|
||||
endif()
|
||||
|
||||
find_library(
|
||||
CUDART_LIBRARY cudart
|
||||
PATHS
|
||||
${CUDA_TOOLKIT_ROOT_DIR}
|
||||
PATH_SUFFIXES
|
||||
lib/x86_64-linux-gnu
|
||||
lib/x64
|
||||
lib64
|
||||
lib
|
||||
NO_DEFAULT_PATH
|
||||
# We aren't going to search any system paths. We want to find the runtime
|
||||
# in the CUDA toolkit we're building against.
|
||||
)
|
||||
|
||||
if(NOT TARGET cudart AND CUDART_LIBRARY)
|
||||
|
||||
message(STATUS "CUDART: ${CUDART_LIBRARY}")
|
||||
|
||||
if(WIN32)
|
||||
add_library(cudart STATIC IMPORTED GLOBAL)
|
||||
# Even though we're linking against a .dll, in Windows you statically link against
|
||||
# the .lib file found under lib/x64. The .dll will be loaded at runtime automatically
|
||||
# from the PATH search.
|
||||
else()
|
||||
add_library(cudart SHARED IMPORTED GLOBAL)
|
||||
endif()
|
||||
|
||||
add_library(nvidia::cudart ALIAS cudart)
|
||||
|
||||
set_property(
|
||||
TARGET cudart
|
||||
PROPERTY IMPORTED_LOCATION
|
||||
${CUDART_LIBRARY}
|
||||
)
|
||||
|
||||
elseif(TARGET cudart)
|
||||
|
||||
message(STATUS "CUDART: Already Found")
|
||||
|
||||
else()
|
||||
|
||||
message(STATUS "CUDART: Not Found")
|
||||
|
||||
endif()
|
||||
|
||||
find_library(
|
||||
CUDA_DRIVER_LIBRARY cuda
|
||||
PATHS
|
||||
${CUDA_TOOLKIT_ROOT_DIR}
|
||||
PATH_SUFFIXES
|
||||
lib/x86_64-linux-gnu
|
||||
lib/x64
|
||||
lib64
|
||||
lib
|
||||
lib64/stubs
|
||||
lib/stubs
|
||||
NO_DEFAULT_PATH
|
||||
# We aren't going to search any system paths. We want to find the runtime
|
||||
# in the CUDA toolkit we're building against.
|
||||
)
|
||||
|
||||
if(NOT TARGET cuda_driver AND CUDA_DRIVER_LIBRARY)
|
||||
|
||||
message(STATUS "CUDA Driver: ${CUDA_DRIVER_LIBRARY}")
|
||||
|
||||
if(WIN32)
|
||||
add_library(cuda_driver STATIC IMPORTED GLOBAL)
|
||||
# Even though we're linking against a .dll, in Windows you statically link against
|
||||
# the .lib file found under lib/x64. The .dll will be loaded at runtime automatically
|
||||
# from the PATH search.
|
||||
else()
|
||||
add_library(cuda_driver SHARED IMPORTED GLOBAL)
|
||||
endif()
|
||||
|
||||
add_library(nvidia::cuda_driver ALIAS cuda_driver)
|
||||
|
||||
set_property(
|
||||
TARGET cuda_driver
|
||||
PROPERTY IMPORTED_LOCATION
|
||||
${CUDA_DRIVER_LIBRARY}
|
||||
)
|
||||
|
||||
elseif(TARGET cuda_driver)
|
||||
|
||||
message(STATUS "CUDA Driver: Already Found")
|
||||
|
||||
else()
|
||||
|
||||
message(STATUS "CUDA Driver: Not Found")
|
||||
|
||||
endif()
|
||||
|
||||
find_library(
|
||||
NVRTC_LIBRARY nvrtc
|
||||
PATHS
|
||||
${CUDA_TOOLKIT_ROOT_DIR}
|
||||
PATH_SUFFIXES
|
||||
lib/x64
|
||||
lib64
|
||||
lib
|
||||
NO_DEFAULT_PATH
|
||||
# We aren't going to search any system paths. We want to find the runtime
|
||||
# in the CUDA toolkit we're building against.
|
||||
)
|
||||
|
||||
if(NOT TARGET nvrtc AND NVRTC_LIBRARY)
|
||||
|
||||
message(STATUS "NVRTC: ${NVRTC_LIBRARY}")
|
||||
|
||||
if(WIN32)
|
||||
add_library(nvrtc STATIC IMPORTED GLOBAL)
|
||||
# Even though we're linking against a .dll, in Windows you statically link against
|
||||
# the .lib file found under lib/x64. The .dll will be loaded at runtime automatically
|
||||
# from the PATH search.
|
||||
else()
|
||||
add_library(nvrtc SHARED IMPORTED GLOBAL)
|
||||
endif()
|
||||
|
||||
add_library(nvidia::nvrtc ALIAS nvrtc)
|
||||
|
||||
set_property(
|
||||
TARGET nvrtc
|
||||
PROPERTY IMPORTED_LOCATION
|
||||
${NVRTC_LIBRARY}
|
||||
)
|
||||
|
||||
elseif(TARGET nvrtc)
|
||||
|
||||
message(STATUS "NVRTC: Already Found")
|
||||
|
||||
else()
|
||||
|
||||
message(STATUS "NVRTC: Not Found")
|
||||
|
||||
endif()
|
||||
|
||||
include_directories(SYSTEM ${CUDA_INCLUDE_DIRS})
|
||||
# Some platforms (e.g. Visual Studio) don't add the CUDA include directories to the system include
|
||||
# paths by default, so we add it explicitly here.
|
||||
|
||||
if (MSVC OR CUTLASS_LIBRARY_KERNELS MATCHES "all")
|
||||
set(CUTLASS_UNITY_BUILD_ENABLED_INIT ON)
|
||||
else()
|
||||
set(CUTLASS_UNITY_BUILD_ENABLED_INIT OFF)
|
||||
endif()
|
||||
|
||||
set(CUTLASS_UNITY_BUILD_ENABLED ${CUTLASS_UNITY_BUILD_ENABLED_INIT} CACHE BOOL "Enable combined source compilation")
|
||||
|
||||
if (MSVC)
|
||||
set(CUTLASS_UNITY_BUILD_BATCH_SIZE_INIT 8)
|
||||
else()
|
||||
set(CUTLASS_UNITY_BUILD_BATCH_SIZE_INIT 16)
|
||||
endif()
|
||||
|
||||
set(CUTLASS_UNITY_BUILD_BATCH_SIZE ${CUTLASS_UNITY_BUILD_BATCH_SIZE_INIT} CACHE STRING "Batch size for unified source files")
|
||||
|
||||
function(cutlass_unify_source_files TARGET_ARGS_VAR)
|
||||
|
||||
set(options)
|
||||
set(oneValueArgs BATCH_SOURCES BATCH_SIZE)
|
||||
set(multiValueArgs)
|
||||
cmake_parse_arguments(_ "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
|
||||
|
||||
if (NOT DEFINED TARGET_ARGS_VAR)
|
||||
message(FATAL_ERROR "TARGET_ARGS_VAR parameter is required")
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED __BATCH_SOURCES)
|
||||
set(__BATCH_SOURCES ON)
|
||||
endif()
|
||||
|
||||
if (__BATCH_SOURCES AND NOT DEFINED __BATCH_SIZE)
|
||||
set(__BATCH_SIZE ${CUTLASS_UNITY_BUILD_BATCH_SIZE})
|
||||
endif()
|
||||
|
||||
if (CUTLASS_UNITY_BUILD_ENABLED AND __BATCH_SOURCES AND __BATCH_SIZE GREATER 1)
|
||||
|
||||
set(CUDA_FILE_ARGS)
|
||||
set(TARGET_SOURCE_ARGS)
|
||||
|
||||
foreach(ARG ${__UNPARSED_ARGUMENTS})
|
||||
if(${ARG} MATCHES ".*\.cu$")
|
||||
list(APPEND CUDA_FILE_ARGS ${ARG})
|
||||
else()
|
||||
list(APPEND TARGET_SOURCE_ARGS ${ARG})
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
list(LENGTH CUDA_FILE_ARGS NUM_CUDA_FILE_ARGS)
|
||||
while(NUM_CUDA_FILE_ARGS GREATER 0)
|
||||
list(SUBLIST CUDA_FILE_ARGS 0 ${__BATCH_SIZE} CUDA_FILE_BATCH)
|
||||
string(SHA256 CUDA_FILE_BATCH_HASH "${CUDA_FILE_BATCH}")
|
||||
string(SUBSTRING ${CUDA_FILE_BATCH_HASH} 0 12 CUDA_FILE_BATCH_HASH)
|
||||
set(BATCH_FILE ${CMAKE_CURRENT_BINARY_DIR}/${NAME}.unity.${CUDA_FILE_BATCH_HASH}.cu)
|
||||
message(STATUS "Generating ${BATCH_FILE}")
|
||||
file(WRITE ${BATCH_FILE} "// Unity File - Auto Generated!\n")
|
||||
foreach(CUDA_FILE ${CUDA_FILE_BATCH})
|
||||
get_filename_component(CUDA_FILE_ABS_PATH ${CUDA_FILE} ABSOLUTE)
|
||||
file(APPEND ${BATCH_FILE} "#include \"${CUDA_FILE_ABS_PATH}\"\n")
|
||||
endforeach()
|
||||
list(APPEND TARGET_SOURCE_ARGS ${BATCH_FILE})
|
||||
if (NUM_CUDA_FILE_ARGS LESS_EQUAL __BATCH_SIZE)
|
||||
break()
|
||||
endif()
|
||||
list(SUBLIST CUDA_FILE_ARGS ${__BATCH_SIZE} -1 CUDA_FILE_ARGS)
|
||||
list(LENGTH CUDA_FILE_ARGS NUM_CUDA_FILE_ARGS)
|
||||
endwhile()
|
||||
|
||||
else()
|
||||
|
||||
set(TARGET_SOURCE_ARGS ${__UNPARSED_ARGUMENTS})
|
||||
|
||||
endif()
|
||||
|
||||
set(${TARGET_ARGS_VAR} ${TARGET_SOURCE_ARGS} PARENT_SCOPE)
|
||||
|
||||
endfunction()
|
||||
function(cutlass_add_library NAME)
|
||||
|
||||
set(options SKIP_GENCODE_FLAGS)
|
||||
set(oneValueArgs EXPORT_NAME)
|
||||
set(multiValueArgs)
|
||||
cmake_parse_arguments(_ "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
|
||||
|
||||
cutlass_unify_source_files(TARGET_SOURCE_ARGS ${__UNPARSED_ARGUMENTS})
|
||||
|
||||
add_library(${NAME} ${TARGET_SOURCE_ARGS} "")
|
||||
|
||||
cutlass_apply_standard_compile_options(${NAME})
|
||||
|
||||
if (NOT __SKIP_GENCODE_FLAGS)
|
||||
cutlass_apply_cuda_gencode_flags(${NAME})
|
||||
endif()
|
||||
|
||||
target_compile_features(
|
||||
${NAME}
|
||||
INTERFACE
|
||||
cxx_std_11
|
||||
)
|
||||
|
||||
get_target_property(TARGET_TYPE ${NAME} TYPE)
|
||||
|
||||
if (TARGET_TYPE MATCHES "SHARED")
|
||||
set_target_properties(${NAME} PROPERTIES CUDA_RUNTIME_LIBRARY Shared)
|
||||
elseif(TARGET_TYPE MATCHES "STATIC")
|
||||
set_target_properties(${NAME} PROPERTIES CUDA_RUNTIME_LIBRARY Static)
|
||||
endif()
|
||||
|
||||
if(__EXPORT_NAME)
|
||||
add_library(nvidia::cutlass::${__EXPORT_NAME} ALIAS ${NAME})
|
||||
set_target_properties(${NAME} PROPERTIES EXPORT_NAME ${__EXPORT_NAME})
|
||||
endif()
|
||||
|
||||
endfunction()
|
||||
|
||||
function(cutlass_add_executable NAME)
|
||||
|
||||
set(options)
|
||||
set(oneValueArgs CUDA_RUNTIME_LIBRARY)
|
||||
set(multiValueArgs)
|
||||
cmake_parse_arguments(_ "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
|
||||
|
||||
if (NOT DEFINED __CUDA_RUNTIME_LIBRARY)
|
||||
set(__CUDA_RUNTIME_LIBRARY Shared)
|
||||
endif()
|
||||
|
||||
set(__CUDA_RUNTIME_LIBRARY_ALLOWED None Shared Static)
|
||||
if (NOT __CUDA_RUNTIME_LIBRARY IN_LIST __CUDA_RUNTIME_LIBRARY_ALLOWED)
|
||||
message(FATAL_ERROR "CUDA_RUNTIME_LIBRARY value '${__CUDA_RUNTIME_LIBRARY}' is not in allowed list of '${__CUDA_RUNTIME_LIBRARY_ALLOWED}'")
|
||||
endif()
|
||||
|
||||
cutlass_unify_source_files(TARGET_SOURCE_ARGS ${__UNPARSED_ARGUMENTS})
|
||||
|
||||
add_executable(${NAME} ${TARGET_SOURCE_ARGS})
|
||||
|
||||
cutlass_apply_standard_compile_options(${NAME})
|
||||
cutlass_apply_cuda_gencode_flags(${NAME})
|
||||
|
||||
target_compile_features(
|
||||
${NAME}
|
||||
INTERFACE
|
||||
cxx_std_11
|
||||
)
|
||||
|
||||
set_target_properties(${NAME} PROPERTIES CUDA_RUNTIME_LIBRARY ${__CUDA_RUNTIME_LIBRARY})
|
||||
|
||||
endfunction()
|
||||
|
||||
function(cutlass_target_sources NAME)
|
||||
|
||||
set(options)
|
||||
set(oneValueArgs)
|
||||
set(multiValueArgs)
|
||||
cmake_parse_arguments(_ "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
|
||||
|
||||
cutlass_unify_source_files(TARGET_SOURCE_ARGS ${__UNPARSED_ARGUMENTS})
|
||||
target_sources(${NAME} ${TARGET_SOURCE_ARGS})
|
||||
|
||||
endfunction()
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,188 @@
|
|||
NVIDIA Software License Agreement
|
||||
|
||||
IMPORTANT NOTICE – PLEASE READ AND AGREE BEFORE USING THE SOFTWARE
|
||||
This software license agreement (“Agreement”) is a legal agreement between you, whether an individual or entity, (“you”) and NVIDIA Corporation (“NVIDIA”) and governs the use of the NVIDIA CUTLASS DSLs software and materials that NVIDIA delivers to you under this Agreement (“Software”).
|
||||
NVIDIA and you are each a “party” and collectively the “parties.”
|
||||
This Agreement can be accepted only by an adult of legal age of majority in the country in which the Software is used.
|
||||
If you don’t have the required age or authority to accept this Agreement, or if you don’t accept all the terms and conditions of this Agreement, do not use the Software.
|
||||
|
||||
1. License Grants
|
||||
|
||||
1.1. License Grant to You. The Software made available by NVIDIA to you is licensed, not sold.
|
||||
Subject to the terms of this Agreement, NVIDIA grants you a limited, non-exclusive, revocable, non-transferable, and non-sublicensable (except as expressly granted in this Agreement), license to:
|
||||
|
||||
a. install and use copies of the Software,
|
||||
b. configure the Software using configuration files provided (if applicable),
|
||||
c. modify and create derivative works of any sample or example source code NVIDIA delivers to you as part of the Software (“Derivatives”) (if applicable), and
|
||||
d. distribute python files in the Software package in source format as incorporated into a software application subject to the following distribution requirements:
|
||||
|
||||
i. Your application must have material additional functionality, beyond the included portions of the Software.
|
||||
ii. The distributable portions of the Software shall only be accessed by your application.
|
||||
iii. The following notice shall be included in modifications and derivative works of sample source code distributed: “This software contains source code provided by NVIDIA Corporation.”
|
||||
iv. Unless a developer tool is identified in this Agreement as distributable, it is delivered for your internal use only.
|
||||
v. The terms under which you distribute your application must be consistent with the terms of this Agreement, including (without limitation) terms relating to the license grant and license restrictions and protection of NVIDIA’s intellectual property rights.
|
||||
vi. Additionally, you agree that you will protect the privacy, security and legal rights of your application users.
|
||||
|
||||
The foregoing (a) through (d) are, collectively, the “Purpose”, and the developed applications are only for use in systems with NVIDIA GPUs.
|
||||
|
||||
1.2. License Grant to NVIDIA. Subject to the terms of this Agreement, you grant NVIDIA and its affiliates a non-exclusive, perpetual, irrevocable, sublicensable, worldwide, royalty-free, fully paid-up and transferable license, under your intellectual property rights, to publicly perform, publicly display, reproduce, use, make, have made, sell, offer for sale, distribute (through multiple tiers of distribution), import, create derivative works of and otherwise commercialize and exploit at NVIDIA’s discretion any Derivatives created by or for you.
|
||||
You may, but are not required to, deliver any Derivatives to NVIDIA.
|
||||
|
||||
2. License Restrictions
|
||||
|
||||
Your license to use the Software and Derivatives is restricted as stated in this Section 2 (“License Restrictions”).
|
||||
You will cooperate with NVIDIA and, upon NVIDIA’s written request, you will confirm in writing and provide reasonably requested information to verify your compliance with the terms of this Agreement.
|
||||
You may not:
|
||||
|
||||
2.1. Use the Software or Derivatives for any purpose other than the Purpose;
|
||||
|
||||
2.2. Sell, rent, sublicense, transfer, distribute or otherwise make available to others (except authorized users as stated in Section 3 (“Authorized Users”)) any portion of the Software or Derivatives, except as expressly granted in Section 1.1 (“License Grant to You”);
|
||||
|
||||
2.3. Reverse engineer, decompile, or disassemble the Software components provided in binary form, nor attempt in any other manner to obtain source code of such Software;
|
||||
|
||||
2.4. Modify or create derivative works of the Software, except as expressly granted in Section 1.1 (“License Grant to You”);
|
||||
|
||||
2.5. Change or remove copyright or other proprietary notices in the Software;
|
||||
|
||||
2.6. Bypass, disable, or circumvent any technical limitation, encryption, security, digital rights management or authentication mechanism in the Software;
|
||||
|
||||
2.7. Use the Software or Derivatives in any manner that would cause them to become subject to an open source software license, subject to the terms in Section 6 (“Components Under Other Licenses”);
|
||||
|
||||
2.8. Use the Software or Derivatives in violation of any applicable law or regulation in relevant jurisdictions
|
||||
|
||||
2.9. Indicate that a product or service developed with the Software or Derivatives is sponsored or endorsed by NVIDIA;
|
||||
|
||||
2.10. Replace any NVIDIA software components in the Software that are governed by this Agreement with other software that implements NVIDIA APIs;
|
||||
|
||||
2.11. Reverse engineer, decompile or disassemble any portion of the output generated using Software elements for the purpose of translating such output artifacts to target a non-NVIDIA platform; or
|
||||
|
||||
3. Authorized Users
|
||||
|
||||
You may allow employees and contractors of your entity or of your subsidiary(ies), and for educational institutions also enrolled students, to internally access and use the Software as authorized by this Agreement from your secure network to perform the work authorized by this Agreement on your behalf.
|
||||
You are responsible for the compliance with the terms of this Agreement by your authorized users.
|
||||
Any act or omission that if committed by you would constitute a breach of this Agreement will be deemed to constitute a breach of this Agreement if committed by your authorized users.
|
||||
|
||||
4. Pre-Release
|
||||
|
||||
Software versions identified as alpha, beta, preview, early access or otherwise as pre-release (“Pre-Release”) may not be fully functional, may contain errors or design flaws, and may have reduced or different security, privacy, availability and reliability standards relative to NVIDIA commercial offerings.
|
||||
You use Pre-Release Software at your own risk. NVIDIA did not design or test the Software for use in production or business-critical systems.
|
||||
NVIDIA may choose not to make available a commercial version of Pre-Release Software.
|
||||
NVIDIA may also choose to abandon development and terminate the availability of Pre-Release Software at any time without liability.
|
||||
|
||||
5. Updates
|
||||
|
||||
NVIDIA may at any time and at its option, change, discontinue, or deprecate any part, or all, of the Software, or change or remove features or functionality, or make available patches, workarounds or other updates to the Software.
|
||||
Unless the updates are provided with their separate governing terms, they are deemed part of the Software licensed to you under this Agreement, and your continued use of the Software is deemed acceptance of such changes.
|
||||
|
||||
6. Components Under Other Licenses
|
||||
|
||||
The Software may include or be distributed with components provided with separate legal notices or terms that accompany the components, such as open source software licenses and other license terms (“Other Licenses”).
|
||||
The components are subject to the applicable Other Licenses, including any proprietary notices, disclaimers, requirements and extended use rights;
|
||||
except that this Agreement will prevail regarding the use of third-party open source software, unless a third-party open source software license requires its license terms to prevail.
|
||||
Open source software license means any software, data or documentation subject to any license identified as an open source license by the Open Source Initiative (http://opensource.org), Free Software Foundation (http://www.fsf.org) or other similar open source organization or listed by the Software Package Data Exchange (SPDX) Workgroup under the Linux Foundation (http://www.spdx.org).
|
||||
|
||||
7. Ownership
|
||||
|
||||
7.1. NVIDIA Ownership. The Software, including all intellectual property rights, is and will remain the sole and exclusive property of NVIDIA or its licensors.
|
||||
Except as expressly granted in this Agreement, (a) NVIDIA reserves all rights, interests and remedies in connection with the Software, and (b) no other license or right is granted to you by implication, estoppel or otherwise.
|
||||
|
||||
7.2. Your Ownership. Subject to the rights of NVIDIA and its suppliers in the Software, which continue to be licensed as stated in this Agreement, even when incorporated in your products or services, and the extent permitted by applicable law, as between you and NVIDIA, you hold all rights, title and interest in and to your products, services and Derivatives you develop as permitted in this Agreement including their respective intellectual property rights.
|
||||
|
||||
8. Feedback
|
||||
|
||||
You may, but you are not obligated to, provide suggestions, requests, fixes, modifications, enhancements, or other feedback regarding the Software (collectively, “Feedback”).
|
||||
Feedback, even if designated as confidential by you, will not create any confidentiality obligation for NVIDIA or its affiliates.
|
||||
If you provide Feedback, you grant NVIDIA, its affiliates and its designees a non-exclusive, perpetual, irrevocable, sublicensable, worldwide, royalty-free, fully paid-up and transferable license, under your intellectual property rights, to publicly perform, publicly display, reproduce, use, make, have made, sell, offer for sale, distribute (through multiple tiers of distribution), import, create derivative works of and otherwise commercialize and exploit the Feedback at NVIDIA’s discretion.
|
||||
|
||||
9. Termination
|
||||
|
||||
9.1. Termination. This Agreement will automatically terminate without notice from NVIDIA if you fail to comply with any of the terms in this Agreement or if you commence or participate in any legal proceeding against NVIDIA with respect to the Software.
|
||||
Additionally, either party may terminate this Agreement at any time with thirty (30) days’ advance written notice to the other party.
|
||||
|
||||
9.2. Effect of Termination. Upon any expiration or termination of this Agreement, you will promptly (a) stop using and return, delete or destroy NVIDIA confidential information and all Software received under this Agreement, and (b) delete or destroy Derivatives created under this Agreement, unless an authorized NVIDIA representative provides prior written approval that you may keep a copy of the Derivatives solely for archival purposes.
|
||||
Upon written request, you will certify in writing that you have complied with your obligations under this Section 9.2 (“Effect of Termination”).
|
||||
|
||||
9.3. Survival. Section 1.2 (“License Grant to NVIDIA”), Section 5 (“Updates”), Section 6 (“Components Under Other Licenses”), Section 7 (“Ownership”), Section 8 (“Feedback), Section 9.2 (“Effect of Termination”), Section 9.3 (“Survival”), Section 10 (“Disclaimer of Warranties”), Section 11 (“Limitation of Liability”), Section 12 (“Use in Mission Critical Applications”), Section 13 (“Governing Law and Jurisdiction”), Section 14 (“Indemnity”) and Section 15 (“General”) will survive any expiration or termination of this Agreement.
|
||||
|
||||
10. Disclaimer of Warranties
|
||||
|
||||
THE SOFTWARE IS PROVIDED BY NVIDIA AS-IS AND WITH ALL FAULTS. TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, NVIDIA DISCLAIMS ALL WARRANTIES AND REPRESENTATIONS OF ANY KIND, WHETHER
|
||||
EXPRESS, IMPLIED OR STATUTORY, RELATING TO OR ARISING UNDER THIS AGREEMENT, INCLUDING, WITHOUT LIMITATION, THE WARRANTIES OF TITLE, NONINFRINGEMENT, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, USAGE OF TRADE AND COURSE OF DEALING. NVIDIA DOES NOT WARRANT OR ASSUME RESPONSIBILITY FOR THE ACCURACY OR COMPLETENESS OF ANY THIRD-PARTY INFORMATION, TEXT, GRAPHICS, LINKS CONTAINED IN THE SOFTWARE.
|
||||
WITHOUT LIMITING THE FOREGOING, NVIDIA DOES NOT WARRANT THAT THE SOFTWARE WILL MEET YOUR REQUIREMENTS, ANY DEFECTS OR ERRORS WILL BE CORRECTED, ANY CERTAIN CONTENT WILL BE AVAILABLE; OR THAT THE SOFTWARE IS FREE OF VIRUSES OR OTHER HARMFUL COMPONENTS. NO INFORMATION OR ADVICE GIVEN BY NVIDIA WILL IN ANY WAY INCREASE THE SCOPE OF ANY WARRANTY EXPRESSLY PROVIDED IN THIS AGREEMENT.
|
||||
NVIDIA does not warrant or assume responsibility for the accuracy or completeness of any third-party information, text, graphics or links contained in the Software.
|
||||
|
||||
11. Limitations of Liability
|
||||
|
||||
11.1. EXCLUSIONS. TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, IN NO EVENT WILL NVIDIA BE LIABLE FOR ANY (I) INDIRECT, PUNITIVE, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES, OR (ii) DAMAGES FOR (a) THE COST OF PROCURING SUBSTITUTE GOODS, OR (b) LOSS OF PROFITS, REVENUES, USE, DATA OR GOODWILL ARISING OUT OF OR RELATED TO THIS AGREEMENT, WHETHER BASED ON BREACH OF CONTRACT, TORT (INCLUDING NEGLIGENCE), STRICT LIABILITY, OR OTHERWISE, AND EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES AND EVEN IF A PARTY’S REMEDIES FAIL THEIR ESSENTIAL PURPOSE.
|
||||
|
||||
11.2. DAMAGES CAP. ADDITIONALLY, TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, NVIDIA’S TOTAL CUMULATIVE AGGREGATE LIABILITY FOR ANY AND ALL LIABILITIES, OBLIGATIONS OR CLAIMS ARISING OUT OF OR RELATED TO THIS AGREEMENT WILL NOT EXCEED FIVE U.S. DOLLARS (US$5).
|
||||
|
||||
12. Use in Mission Critical Applications
|
||||
|
||||
You acknowledge that the Software provided under this Agreement is not designed or tested by NVIDIA for use in any system or application where the use or failure of such system or application developed with NVIDIA’s Software could result in injury, death or catastrophic damage (each, a “Mission Critical Application”).
|
||||
Examples of Mission Critical Applications include use in avionics, navigation, autonomous vehicle applications, AI solutions for automotive products, military, medical, life support or other mission-critical or life-critical applications.
|
||||
NVIDIA will not be liable to you or any third party, in whole or in part, for any claims or damages arising from these uses.
|
||||
You are solely responsible for ensuring that systems and applications developed with the Software include sufficient safety and redundancy features and comply with all applicable legal and regulatory standards and requirements.
|
||||
|
||||
13. Governing Law and Jurisdiction
|
||||
|
||||
This Agreement will be governed in all respects by the laws of the United States and the laws of the State of Delaware, without regard to conflict of laws principles or the United Nations Convention on Contracts for the International Sale of Goods.
|
||||
The state and federal courts residing in Santa Clara County, California will have exclusive jurisdiction over any dispute or claim arising out of or related to this Agreement, and the parties irrevocably consent to personal jurisdiction and venue in those courts;
|
||||
except that either party may apply for injunctive remedies or an equivalent type of urgent legal relief in any jurisdiction.
|
||||
|
||||
14. Indemnity
|
||||
|
||||
By using the Software you agree to defend, indemnify and hold harmless NVIDIA and its affiliates and their respective officers, directors, employees and agents from and against any claims, disputes, demands, liabilities, damages, losses, costs and expenses arising out of or in any way connected with (i) products or services that have been developed or deployed with or use the Software, or claims that they violate laws, or infringe, violate, or misappropriate any third party right;
|
||||
or (ii) use of the Software in breach of the terms of this Agreement.
|
||||
|
||||
15. General
|
||||
|
||||
15.1. Independent Contractors.
|
||||
The parties are independent contractors, and this Agreement does not create a joint venture, partnership, agency, or other form of business association between the parties.
|
||||
Neither party will have the power to bind the other party or incur any obligation on its behalf without the other party’s prior written consent.
|
||||
Nothing in this Agreement prevents either party from participating in similar arrangements with third parties.
|
||||
|
||||
15.2. No Assignment.
|
||||
NVIDIA may assign, delegate or transfer its rights or obligations under this Agreement by any means or operation of law.
|
||||
You may not, without NVIDIA’s prior written consent, assign, delegate or transfer any of your rights or obligations under this Agreement by any means or operation of law, and any attempt to do so is null and void.
|
||||
|
||||
15.3. No Waiver.
|
||||
No failure or delay by a party to enforce any term or obligation of this Agreement will operate as a waiver by that party, or prevent the enforcement of such term or obligation later.
|
||||
|
||||
15.4. Trade Compliance.
|
||||
You agree to comply with all applicable export, import, trade and economic sanctions laws and regulations, as amended, including without limitation U.S. Export Administration Regulations and Office of Foreign Assets Control regulations.
|
||||
You confirm (a) your understanding that export or reexport of certain NVIDIA products or technologies may require a license or other approval from appropriate authorities and (b) that you will not export or reexport any products or technology, directly or indirectly, without first obtaining any required license or other approval from appropriate authorities, (i) to any countries that are subject to any U.S. or local export restrictions (currently including, but not necessarily limited to, Belarus, Cuba, Iran, North Korea, Russia, Syria, the Region of Crimea, Donetsk People’s Republic Region and Luhansk People’s Republic Region);
|
||||
(ii) to any end-user who you know or have reason to know will utilize them in the design, development or production of nuclear, chemical or biological weapons, missiles, rocket systems, unmanned air vehicles capable of a maximum range of at least 300 kilometers, regardless of payload, or intended for military end-use, or any weapons of mass destruction;
|
||||
(iii) to any end-user who has been prohibited from participating in the U.S. or local export transactions by any governing authority;
|
||||
or (iv) to any known military or military-intelligence end-user or for any known military or military-intelligence end-use in accordance with U.S. trade compliance laws and regulations.
|
||||
|
||||
15.5. Government Rights.
|
||||
The Software, documentation and technology (“Protected Items”) are “Commercial products” as this term is defined at 48 C.F.R.
|
||||
2.101, consisting of “commercial computer software” and “commercial computer software documentation” as such terms are used in, respectively, 48 C.F.R.
|
||||
12.212 and 48 C.F.R. 227.7202 & 252.227-7014(a)(1). Before any Protected Items are supplied to the U.S. Government, you will (i) inform the U.S. Government in writing that the Protected Items are and must be treated as commercial computer software and commercial computer software documentation developed at private expense;
|
||||
(ii) inform the U.S. Government that the Protected Items are provided subject to the terms of the Agreement;
|
||||
and (iii) mark the Protected Items as commercial computer software and commercial computer software documentation developed at private expense.
|
||||
In no event will you permit the U.S. Government to acquire rights in Protected Items beyond those specified in 48 C.F.R.
|
||||
52.227-19(b)(1)-(2) or 252.227-7013(c) except as expressly approved by NVIDIA in writing.
|
||||
|
||||
15.6. Notices.
|
||||
Please direct your legal notices or other correspondence to legalnotices@nvidia.com with a copy mailed to NVIDIA Corporation, 2788 San Tomas Expressway, Santa Clara, California 95051, United States of America, Attention: Legal Department.
|
||||
If NVIDIA needs to contact you, you consent to receive the notices by email and agree that such notices will satisfy any legal communication requirements.
|
||||
|
||||
15.7. Severability.
|
||||
If a court of competent jurisdiction rules that a provision of this Agreement is unenforceable, that provision will be deemed modified to the extent necessary to make it enforceable and the remainder of this Agreement will continue in full force and effect.
|
||||
|
||||
15.8. Amendment.
|
||||
Any amendment to this Agreement must be in writing and signed by authorized representatives of both parties.
|
||||
|
||||
15.9. Construction.
|
||||
The headings in the Agreement are included solely for convenience and are not intended to affect the meaning or interpretation of the Agreement.
|
||||
As required by the context of the Agreement, the singular of a term includes the plural and vice versa.
|
||||
|
||||
15.10. Force Majeure.
|
||||
Neither party will be liable during any period where an event or circumstance prevents or delays that party from performing its obligations under this Agreement and that event or circumstance: (i) is not within the reasonable control of that party and is not the result of that party’s negligence, and (ii) cannot be overcome or avoided by that party using reasonably diligent efforts.
|
||||
|
||||
15.11. Entire Agreement.
|
||||
Regarding the subject matter of this Agreement, the parties agree that (a) this Agreement constitutes the entire and exclusive agreement between the parties and supersedes all prior and contemporaneous communications and (b) any additional or different terms or conditions, whether contained in purchase orders, order acknowledgments, invoices or otherwise, will not be binding and are null and void.
|
||||
|
||||
(v. May 8, 2025)
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
Copyright (c) 2017 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
3. Neither the name of the copyright holder nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
Certain files within this repository are subject to separate licensing terms:
|
||||
|
||||
- The files located in the `python/CuTeDSL` directory are licensed under the
|
||||
NVIDIA End User License Agreement (EULA). Please refer to
|
||||
https://docs.nvidia.com/cutlass/media/docs/pythonDSL/license.html
|
||||
for the full terms.
|
||||
|
|
@ -0,0 +1,104 @@
|
|||
# Publications Using Cutlass
|
||||
|
||||
## 2025
|
||||
|
||||
- ["Comet: Fine-grained Computation-communication Overlapping for Mixture-of-Experts"](https://arxiv.org/abs/2502.19811). Shulai Zhang, Ningxin Zheng, Haibin Lin, Ziheng Jiang, Wenlei Bao, Chengquan Jiang, Qi Hou, Weihao Cui, Size Zheng, Li-Wen Chang, Quan Chen, Xin Liu. _arXiv_, February 2025.
|
||||
|
||||
- ["ParetoQ: Scaling Laws in Extremely Low-bit LLM Quantization"](https://arxiv.org/abs/2502.02631). Zechun Liu, Changsheng Zhao, Hanxian Huang, Sijia Chen, Jing Zhang, Jiawei Zhao, Scott Roy, Lisa Jin, Yunyang Xiong, Yangyang Shi, Lin Xiao, Yuandong Tian, Bilge Soran, Raghuraman Krishnamoorthi, Tijmen Blankevoort, Vikas Chandra. _arXiv_, February 2025.
|
||||
|
||||
- ["Generalized Neighborhood Attention: Multi-dimensional Sparse Attention at the Speed of Light"](https://arxiv.org/abs/2504.16922). Ali Hassani, Fengzhe Zhou, Aditya Kane, Jiannan Huang, Chieh-Yun Chen, Min Shi, Steven Walton, Markus Hoehnerbach, Vijay Thakkar, Michael Isaev, Qinsheng Zhang, Bing Xu, Haicheng Wu, Wen-mei Hwu, Ming-Yu Liu, Humphrey Shi. _arXiv_, April 2025.
|
||||
|
||||
## 2024
|
||||
|
||||
- ["DeepSeek-V3 Technical Report"](https://arxiv.org/abs/2412.19437). DeepSeek-AI. _arXiv_, December 2024.
|
||||
|
||||
- ["ShadowKV: KV Cache in Shadows for High-Throughput Long-Context LLM Inference"](https://arxiv.org/abs/2410.21465). Hanshi Sun, Li-Wen Chang, Wenlei Bao, Size Zheng, Ningxin Zheng, Xin Liu, Harry Dong, Yuejie Chi, Beidi Chen. _arXiv_, October 2024.
|
||||
|
||||
- ["FLUX: Fast Software-based Communication Overlap On GPUs Through Kernel Fusion"](https://arxiv.org/abs/2406.06858). Li-Wen Chang, Wenlei Bao, Qi Hou, Chengquan Jiang, Ningxin Zheng, Yinmin Zhong, Xuanrun Zhang, Zuquan Song, Chengji Yao, Ziheng Jiang, Haibin Lin, Xin Jin, Xin Liu. _arXiv_, June 2024.
|
||||
|
||||
- ["EVT: Accelerating Deep Learning Training with Epilogue Visitor Tree"](https://dl.acm.org/doi/10.1145/3620666.3651369). Zhaodong Chen, Andrew Kerr, Richard Cai, Jack Kosaian, Haicheng Wu, Yufei Ding, and Yuan Xie. _Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems_, April 2024.
|
||||
|
||||
- ["Faster Neighborhood Attention: Reducing the O(n^2) Cost of Self Attention at the Threadblock Level"](https://arxiv.org/abs/2403.04690). Ali Hassani, Wen-Mei Hwu, Humphrey Shi. _arXiv_, March 2024.
|
||||
|
||||
## 2023
|
||||
|
||||
- ["A Case Study in CUDA Kernel Fusion: Implementing FlashAttention-2 on NVIDIA Hopper Architecture using the CUTLASS Library"](https://arxiv.org/abs/2312.11918). Ganesh Bikshandi, Jay Shah. _arXiv_, December 2023.
|
||||
|
||||
- ["Benchmarking GPU Tensor Cores on General Matrix Multiplication Kernels through CUTLASS"](https://www.mdpi.com/2076-3417/13/24/13022). Xuanteng Huang, Xianwei Zhang, Panfei Yang, Nong Xiao. _Journal of Applied Sciences_, December 2023.
|
||||
|
||||
- ["A Speed Odyssey for Deployable Quantization of LLMs"](https://arxiv.org/abs/2311.09550). Qingyuan Li, Ran Meng, Yiduo Li, Bo Zhang, Liang Li, Yifan Lu, Xiangxiang Chu, Yerui Sun, Yuchen Xie. _arXiv_, November 2023.
|
||||
|
||||
- ["FlashAttention-2: Faster Attention with Better Parallelism and Work Partitioning"](https://arxiv.org/abs/2307.08691). Tri Dao. _Technical Report_, July 2023.
|
||||
|
||||
- ["MegaBlocks: Efficient Sparse Training with Mixture-of-Experts"](https://arxiv.org/abs/2211.15841). Trevor Gale, Deepak Narayanan, Cliff Young, Matei Zaharia. _Proceedings of the Sixth Machine Learning and Systems_, May 2023.
|
||||
|
||||
- ["ByteTransformer: A High-Performance Transformer Boosted for Variable-Length Inputs"](https://arxiv.org/abs/2210.03052). Yujia Zhai, Chengquan Jiang, Leyuan Wang, Xiaoying Jia, Shang Zhang, Zizhong Chen, Xin Liu, Yibo Zhu. _Proceedings of the 37th IEEE International Parallel & Distributed Processing Symposium (Best Paper)_, May 2023.
|
||||
|
||||
- ["A Framework for Fine-Grained Synchronization of Dependent GPU Kernels"](https://arxiv.org/abs/2305.13450). Abhinav Jangda, Saeed Maleki, Maryam Mehri Dehnavi, Madan Musuvathi, Olli Saarikivi. _Computing Research Repository_, May 2023.
|
||||
|
||||
- ["Graphene: An IR for Optimized Tensor Computations on GPUs"](https://dl.acm.org/doi/pdf/10.1145/3582016.3582018). Hagedorn, Bastian, Bin Fan, Hanfeng Chen, Cris Cecka, Michael Garland, Vinod Grover. _Proceedings of the 28th ACM International Conference on Architectural Support for Programming Languages and Operating Systems_, March 2023.
|
||||
|
||||
- ["Mixed Precision Post Training Quantization of Neural Networks with Sensitivity Guided Search"](https://arxiv.org/abs/2302.01382). Clemens JS Schaefer, Elfie Guo, Caitlin Stanton, Xiaofan Zhang, Tom Jablin, Navid Lambert-Shirzad, Jian Li, Chiachen Chou, Siddharth Joshi, Yu Emma Wang. _arXiv_, February 2023.
|
||||
|
||||
- ["Dynamic N:M Fine-Grained Structured Sparse Attention Mechanism"](https://dl.acm.org/doi/abs/10.1145/3572848.3577500). Zhaodong Chen, Zheng Qu, Yuying Quan, Liu Liu, Yufei Ding, Yuan Xie. _Proceedings of the 28th ACM SIGPLAN Annual Symposium on Principles and Practice of Parallel Programming_, February 2023.
|
||||
|
||||
- ["Stream-K: Work-centric Parallel Decomposition for Dense Matrix-Matrix Multiplication on the GPU"](https://arxiv.org/abs/2301.03598). Muhammad Osama, Duane Merrill, Cris Cecka, Michael Garland, John D. Owens. _arXiv_, January 2023.
|
||||
|
||||
## 2022
|
||||
|
||||
- ["GPU Load Balancing"](https://arxiv.org/abs/2212.08964). Muhammad Osama. _Doctoral dissertation, University of California, Davis_, December 2022.
|
||||
|
||||
- ["Who Says Elephants Can't Run: Bringing Large Scale MoE Models into Cloud Scale Production"](https://arxiv.org/abs/2211.10017). Young Jin Kim, Rawn Henry, Raffy Fahim, Hany Hassan Awadalla. _Proceedings of the Third Workshop on Simple and Efficient Natural Language Processing_, December 2022.
|
||||
|
||||
- ["Bolt: Bridging the Gap between Auto-tuners and Hardware-native Performance"](https://arxiv.org/abs/2110.15238). Jiarong Xing, Leyuan Wang, Shang Zhang, Jack Chen, Ang Chen, Yibo Zhu. _Proceedings of the 5th MLSys Conference_, August 2022.
|
||||
|
||||
- ["Recovering single precision accuracy from Tensor Cores while surpassing the FP32 theoretical peak performance"](https://arxiv.org/abs/2203.03341). Hiroyuki Ootomo, Rio Yokota. _International Journal of High Performance Computing_, March 2022.
|
||||
|
||||
- ["Breaking the Computation and Communication Abstraction Barrier in Distributed Machine Learning Workloads"](https://arxiv.org/abs/2105.05720). Abhinav Jangda, Jun Huang, Guodong Liu, Amir Hossein Nodehi Sabet, Saeed Maleki, Youshan Miao, Madanlal Musuvathi, Todd Mytkowicz, Olli Sarikivi. _Proceedings of the 27th ACM International Conference on Architectural Support for Programming Languages and Operating Systems_, February 2022.
|
||||
|
||||
## 2021
|
||||
|
||||
- ["Arithmetic-intensity-guided fault tolerance for neural network inference on GPUs"](https://dl.acm.org/doi/abs/10.1145/3458817.3476184). Jack Kosaian, K. V. Rashmi. _Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis_, November 2021.
|
||||
|
||||
- ["Real-time Neural Radiance Caching for Path Tracing"](https://dl.acm.org/doi/abs/10.1145/3450626.3459812). Thomas Muller, Fabrice Rousselle, Jan Novak, Alex Keller. _ACM Trans. Graph._, August 2021.
|
||||
|
||||
## 2020
|
||||
|
||||
- ["Scalable Knowledge Graph Analytics at 136 Petaflop/s"](https://www.computer.org/csdl/proceedings-article/sc/2020/999800a061/1oeORDgCM0g). Ramakrishnan Kannan, Piyush Sao, Hao Lu, Drahomira Herrmannova, Vijay Thakkar, Robert Patton, Richard Vuduc, Thomas Potok. _Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis_, November 2020.
|
||||
|
||||
- ["Accelerating Sparse DNN Models without Hardware-Support via Tile-Wise Sparsity
|
||||
"](https://arxiv.org/abs/2008.13006). Cong Guo, Bo Yang Hsueh, Jingwen Leng, Yuxian Qiu, Yue Guan, Zehuan Wang, Xiaoying Jia, Xipeng Li, Minyi Guo, Yuhao Zhu. _Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis_, November 2020.
|
||||
|
||||
- ["Strassen's Algorithm Reloaded on GPUs"](https://dl.acm.org/doi/10.1145/3372419). Jianyu Huang, Chenhan D. Yu, Robert A. van de Geijn. _ACM Transactions on Mathematical Software_, March 2020.
|
||||
|
||||
## Copyright
|
||||
|
||||
Copyright (c) 2017 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
```
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
3. Neither the name of the copyright holder nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
```
|
||||
|
|
@ -0,0 +1,643 @@
|
|||

|
||||
# Overview
|
||||
|
||||
# CUTLASS 4.2.0
|
||||
|
||||
_CUTLASS 4.2.0 - Aug 2025_
|
||||
|
||||
CUTLASS is a collection of abstractions for implementing high-performance matrix-matrix multiplication (GEMM)
|
||||
and related computations at all levels and scales within CUDA. It incorporates strategies for
|
||||
hierarchical decomposition and data movement. CUTLASS decomposes these "moving parts" into reusable, modular
|
||||
software components and abstractions.
|
||||
|
||||
Primitives for different levels of a conceptual parallelization hierarchy can be specialized and tuned
|
||||
via custom tiling sizes, data types, and other algorithmic policy. The resulting flexibility simplifies
|
||||
their use as building blocks within custom kernels and applications.
|
||||
|
||||
CUTLASS has been providing CUDA C++ template abstractions for high-performance linear algebra since 2017 and
|
||||
these abstractions provide extensive support for a wide range of computations including
|
||||
mixed-precision computations, specialized data-movement (async copy) and
|
||||
multiply-accumulate abstractions for FP64, FP32, TF32, FP16, BF16,
|
||||
[FP32 emulation via tensor core instruction](https://github.com/NVIDIA/cutlass/tree/main/examples/27_ampere_3xtf32_fast_accurate_tensorop_gemm),
|
||||
8b floating point types (e5m2 and e4m3),
|
||||
block scaled data types (NVIDIA NVFP4 and OCP standard MXFP4, MXFP6, MXFP8),
|
||||
narrow integer types (4 and 8b signed and unsigned integers),
|
||||
and binary 1b data types (where architectures allow for the
|
||||
native support of such data types) across NVIDIA's Volta, Turing, Ampere, Ada, Hopper, and Blackwell architectures.
|
||||
|
||||
To this rich ecosystem of C++ based kernel programming abstractions, CUTLASS 4 adds CUTLASS DSLs. These are Python native interfaces for writing high-performance CUDA kernels based on core CUTLASS and CuTe concepts without any performance compromises. This allows for a much smoother learning curve, orders of magnitude faster compile times, native integration with DL frameworks without writing glue code, and much more intuitive metaprogramming that does not require deep C++ expertise.
|
||||
|
||||
Overall we envision CUTLASS DSLs as a family of domain-specific languages (DSLs). With the release of 4.0, we are releasing the first of these in CuTe DSL. This is a low level programming model that is fully consistent with CuTe C++ abstractions — exposing core concepts such as layouts, tensors, hardware atoms, and full control over the hardware thread and data hierarchy.
|
||||
|
||||
CuTe DSL demonstrates optimal matrix multiply and other linear algebra operations
|
||||
targeting the programmable, high-throughput _Tensor Cores_ implemented by
|
||||
NVIDIA's Ampere, Hopper, and Blackwell architectures.
|
||||
|
||||
We believe it will become an indispensable tool for students, researchers, and performance
|
||||
engineers alike — flattening the learning curve of GPU programming, rapidly prototyping kernel
|
||||
designs, and bringing optimized solutions into production.
|
||||
|
||||
CuTe DSL is currently in public beta and will graduate out of beta by end of summer 2025.
|
||||
|
||||
To get started quickly - please refer :
|
||||
- [CUTLASS C++ Quick Start Guide](https://docs.nvidia.com/cutlass/media/docs/cpp/quickstart.html).
|
||||
- [CuTe DSL Quick Start Guide](https://docs.nvidia.com/cutlass/media/docs/pythonDSL/quick_start.html).
|
||||
|
||||
# What's New in CUTLASS 4.2
|
||||
|
||||
## CuTe DSL
|
||||
* We will likely be skipping 4.2.dev release and directly target 4.2.
|
||||
* CuTeDSL version remains at 4.1.0 till then.
|
||||
|
||||
## CUTLASS C++
|
||||
* Add K major scale factor support for Hopper SM90 blockwise kernels.
|
||||
* Further enhance Blackwell SM100 Attention kernels in [example 77](https://github.com/NVIDIA/cutlass/tree/main/examples/77_blackwell_fmha/).
|
||||
- Add fused reduction kernel support for cutlass MLA.
|
||||
- Fix an issue where `get_unmasked_trip_count` may return a negative value.
|
||||
- Fix an issue where mbarriers are initialized with a zero arrival count.
|
||||
* Add Blackwell SM120 blockwise gemm kernel example: [example 87](https://github.com/NVIDIA/cutlass/tree/main/87_blackwell_geforce_gemm_blockwise/).
|
||||
* Support for Blackwell SM100 cpasync kernel.
|
||||
- Collective mainloop codes: [cpasync mainloop](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/gemm/collective/sm100_mma_cpasync_warpspecialized.hpp).
|
||||
- Kernel codes: [cpasync kernel](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/gemm/kernel/sm100_gemm_cpasync_warpspecialized.hpp).
|
||||
* Support for Blackwell SM121 kernels for DGX Spark GPUs.
|
||||
- Share the major codes with Blackwell SM120 kernels.
|
||||
* Support for Blackwell SM100 legacy mixed input GEMM kernels.
|
||||
- Collective mainloop codes: [Mixed input mainloop](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/gemm/collective/sm100_mma_warpspecialized_mixed_input.hpp).
|
||||
- Kernel codes: [Mixed input kernel](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/gemm/kernel/sm100_gemm_tma_warpspecialized_mixed_input_transform.hpp).
|
||||
- Example codes: [example 86](https://github.com/NVIDIA/cutlass/tree/main/examples/86_blackwell_mixed_dtype_gemm/).
|
||||
* Support for Blackwell SM100 fp4 gemv kernels.
|
||||
- Kernel codes: [Gemv kernel](https://github.com/NVIDIA/cutlass/tree/main/include/cutlass/gemm/kernel/gemv_blockscaled.h).
|
||||
- Example codes: [example 91](https://github.com/NVIDIA/cutlass/tree/main/examples/91_fp4_gemv/)
|
||||
* From CUDA 13.0, the Blackwell SM101 for Thor GPUs is renamed to SM110.
|
||||
- For CUDA toolkit version < 13.0, SM101 is still used for Thor GPUs.
|
||||
- For CUDA toolkit version >= 13.0, SM110 is used for Thor GPUs and SM101 is no longer valid.
|
||||
* CuTe changes:
|
||||
- Fix inaccurate GridDim calculation under [CuTe tutorial](https://github.com/NVIDIA/cutlass/tree/main/examples/cute/tutorial/blackwell/).
|
||||
- Add [movmatrix](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-instructions-movmatrix) support.
|
||||
- Fix smallest MMA-N allowed for Blackwell fp8 and fp16 gemm kernels.
|
||||
- Support fp16 accmulator for sm89 fp8 mma.
|
||||
- Shorten `nullspace` implementation.
|
||||
- Isolate and comment on `cosize` hacks.
|
||||
- Important documentation correction: `E<0,1> == 1@0@1`.
|
||||
* Add support for heuristics-based kernel filtering and autotuning using `nvidia-matmul-heuristics`.
|
||||
- Details please refer to [heuristics doc](https://github.com/NVIDIA/cutlass/tree/main/media/docs/cpp/heuristics.md).
|
||||
* Rename legacy Python API package from `cutlass` to `cutlass_cppgen`.
|
||||
* Fix some profiler issues:
|
||||
- Modify default cluster callback values to none 0 to avoid profiler failure when these values are not set in command line.
|
||||
- Fix some no output and timeout issues.
|
||||
* Add following unit tests:
|
||||
- [fp16 accmulator for sm89 fp8 mma](https://github.com/NVIDIA/cutlass/tree/main/test/unit/cute/ampere/cooperative_gemm.cu)
|
||||
- [movmatrix test](https://github.com/NVIDIA/cutlass/tree/main/test/unit/cute/turing/movm.cu)
|
||||
- [fp8 narrow mma n](https://github.com/NVIDIA/cutlass/tree/main/test/unit/gemm/device/sm100_tensorop_gemm/f16_f16_void_f32_narrow_mma_n.cu) and [fp16 narrow mma n](test/unit/gemm/device/sm100_tensorop_gemm/f8_f8_void_bf16_narrow_mma_n.cu)
|
||||
|
||||
Note: CUTLASS 4.x builds are known to be down on Windows platforms for all CUDA toolkits.
|
||||
CUTLASS team is working on a fix.
|
||||
|
||||
**See the [CHANGELOG](https://docs.nvidia.com/cutlass/CHANGELOG.html) for details of all past releases and updates.**
|
||||
|
||||
# Performance
|
||||
|
||||
CUTLASS primitives are very efficient. When used to construct device-wide GEMM kernels,
|
||||
they exhibit nearly optimal utilization of peak theoretical throughput. The figure below
|
||||
shows CUTLASS 3.8's performance as a % of theoretical peak utilization
|
||||
on various input and output data types when run on NVIDIA Blackwell SM100 architecture GPU.
|
||||
|
||||

|
||||
|
||||
The two figures below show the continual CUTLASS performance improvements
|
||||
on an [NVIDIA H100](https://www.nvidia.com/en-us/data-center/h100/) (NVIDIA Hopper architecture) since
|
||||
CUTLASS 3.1.
|
||||
CUTLASS 3.5.1 was compiled with the [CUDA 12.5u1 Toolkit](https://developer.nvidia.com/cuda-downloads).
|
||||
Tensor Core operations are implemented using CUDA's
|
||||
[mma](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-instructions-mma) and
|
||||
[wgmma](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#asynchronous-warpgroup-level-matrix-instructions) instructions.
|
||||
|
||||

|
||||

|
||||
|
||||
# CuTe
|
||||
|
||||
CUTLASS 3.0 introduced a new core library, CuTe, to describe and manipulate tensors of threads and data.
|
||||
CuTe is a collection of C++ CUDA template abstractions for
|
||||
defining and operating on hierarchically multidimensional layouts of threads and data.
|
||||
CuTe provides `Layout` and `Tensor` objects that compactly package the type,
|
||||
shape, memory space, and layout of data, while performing the complicated indexing for the user.
|
||||
This lets programmers focus on the logical descriptions of their algorithms while
|
||||
CuTe does the mechanical bookkeeping for them. With these tools, we can quickly design,
|
||||
implement, and modify all dense linear algebra operations.
|
||||
|
||||
The core abstractions of CuTe are hierarchically multidimensional layouts
|
||||
which can be composed with data arrays to represent tensors.
|
||||
The representation of layouts is powerful enough to represent nearly
|
||||
everything we need to implement efficient dense linear algebra.
|
||||
Layouts can also be combined and manipulated via functional composition, on which we build a large set of common operations such as tiling and partitioning.
|
||||
|
||||
CUTLASS 3.0 and beyond adopts CuTe throughout the GEMM hierarchy in its templates.
|
||||
This greatly simplifies the design and improves code composability and readability.
|
||||
More documentation specific to CuTe can be found in its
|
||||
[dedicated documentation directory](https://docs.nvidia.com/cutlass/media/docs/cpp/cute/00_quickstart.html).
|
||||
|
||||
# Compatibility
|
||||
|
||||
Minimum requirements:
|
||||
|
||||
- Architecture: Volta (compute capability 7.0)
|
||||
- Compiler: Must support at least C++17
|
||||
- CUDA Toolkit version: 11.4
|
||||
|
||||
CUTLASS requires a C++17 host compiler and
|
||||
performs best when built with the [**CUDA 12.8 Toolkit**](https://developer.nvidia.com/cuda-downloads).
|
||||
It is also compatible with CUDA 11.4, CUDA 11.5, CUDA 11.6, CUDA 11.7, CUDA 11.8, and all other CUDA 12.x versions.
|
||||
|
||||
## Operating Systems
|
||||
|
||||
We have tested the following environments.
|
||||
|
||||
|**Operating System** | **Compiler** |
|
||||
|-----------------|----------|
|
||||
| Ubuntu 18.04 | GCC 7.5.0 |
|
||||
| Ubuntu 20.04 | GCC 10.3.0 |
|
||||
| Ubuntu 22.04 | GCC 11.2.0 |
|
||||
|
||||
Note: GCC 8.5.0 has known regressions regarding fold expressions and overloaded operators. Using GCC 7.5.0 or (preferred) GCC >= 9 is recommended.
|
||||
|
||||
Note: CUTLASS 3.x builds are known to be down on Windows platforms for all CUDA toolkits.
|
||||
CUTLASS team is working on a fix.
|
||||
|
||||
## Hardware
|
||||
|
||||
CUTLASS runs successfully on the following NVIDIA GPUs, and it is expected to be efficient on Volta, Turing, Ampere, Ada, and Hopper architecture based NVIDIA GPUs.
|
||||
|
||||
|**GPU**|**CUDA Compute Capability**|**Minimum CUDA Toolkit Required by CUTLASS-3**|
|
||||
|---|---|---|
|
||||
|NVIDIA V100 Tensor Core GPU |7.0|11.4|
|
||||
|NVIDIA TitanV |7.0|11.4|
|
||||
|NVIDIA GeForce RTX 20x0 series |7.5|11.4|
|
||||
|NVIDIA T4 |7.5|11.4|
|
||||
|NVIDIA A100 Tensor Core GPU |8.0|11.4|
|
||||
|NVIDIA A10 |8.6|11.4|
|
||||
|NVIDIA GeForce RTX 30x0 series |8.6|11.4|
|
||||
|NVIDIA GeForce RTX 40x0 series |8.9|11.8|
|
||||
|NVIDIA L40 |8.9|11.8|
|
||||
|NVIDIA H100 Tensor Core GPU |9.0|11.8|
|
||||
|NVIDIA H200 Tensor Core GPU |9.0|11.8|
|
||||
|NVIDIA B200 Tensor Core GPU |10.0|12.8|
|
||||
|NVIDIA GeForce RTX 50x0 series |12.0|12.8|
|
||||
|
||||
## Target Architecture
|
||||
|
||||
In general, PTX code generated for one target architecture can be run on future architectures
|
||||
(i.e., it is forward compatible).
|
||||
However, CUDA 12.0 introduced the concept of "architecture-accelerated features" whose
|
||||
PTX does not have forward compatibility guarantees.
|
||||
Several Hopper and Blackwell PTX instructions fall under this category of
|
||||
architecture-accelerated features, and thus require a `sm_90a` or `sm100a` target architecture
|
||||
(note the "a" appended). For more details on this and other architecture-accelerated instructions,
|
||||
please refer to the [CUDA Documentation](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#feature-availability).
|
||||
|
||||
The target architecture information is passed on to CUTLASS via the cmake flag
|
||||
`CUTLASS_NVCC_ARCHS`. In order to maximize performance on Hopper GH100,
|
||||
users are required to build CUTLASS with `90a` as the target architecture.
|
||||
If a user accidentally builds a kernel which uses SM90a features
|
||||
(e.g. Hopper Tensor Core Instructions), using the SM90 target
|
||||
(note the lack of "a"), with either CUDA Toolkit 12 or 11.8,
|
||||
the kernel is expected to fail with a runtime error.
|
||||
|
||||
```
|
||||
cmake .. -DCUTLASS_NVCC_ARCHS="90a"
|
||||
```
|
||||
Or
|
||||
|
||||
```
|
||||
cmake .. -DCUTLASS_NVCC_ARCHS="100a"
|
||||
```
|
||||
|
||||
Note: The NVIDIA Blackwell SM100 architecture used in the datacenter
|
||||
products has a different compute capability than the one underpinning
|
||||
NVIDIA Blackwell GeForce RTX 50 series GPUs (SM120). As a result, kernels
|
||||
compiled for Blackwell SM100 architecture with arch conditional features
|
||||
(using `sm100a`) are not compatible with RTX 50 series GPUs.
|
||||
|
||||
Please refer to the [functionality documentation](https://docs.nvidia.com/cutlass/media/docs/cpp/functionality.html)
|
||||
for details on which kernels require which target architectures.
|
||||
|
||||
# Documentation
|
||||
|
||||
CUTLASS is described in the following documents and the accompanying
|
||||
[Doxygen documentation](https://nvidia.github.io/cutlass).
|
||||
|
||||
- [Quick Start Guide](https://docs.nvidia.com/cutlass/media/docs/cpp/quickstart.html) - basics of building and running CUTLASS
|
||||
- [Functionality](https://docs.nvidia.com/cutlass/media/docs/cpp/functionality.html) - summarizes functionality available in CUTLASS
|
||||
- [Efficient GEMM in CUDA](https://docs.nvidia.com/cutlass/media/docs/cpp/efficient_gemm.html) - describes how GEMM kernels may be implemented efficiently in CUDA
|
||||
- [CUTLASS 3.x Design](https://docs.nvidia.com/cutlass/media/docs/cpp/cutlass_3x_design.html) - describes the CUTLASS 3.x design, its benefits, and how CuTe enables us to write much more composable components
|
||||
- [GEMM API 3.x](https://docs.nvidia.com/cutlass/media/docs/cpp/gemm_api_3x.html) - describes the CUTLASS 3.x GEMM model and C++ template concepts
|
||||
- [GEMM API 2.x](https://docs.nvidia.com/cutlass/media/docs/cpp/gemm_api.html) - describes the CUTLASS 2.x GEMM model and C++ template concepts
|
||||
- [Implicit GEMM Convolution](https://docs.nvidia.com/cutlass/media/docs/cpp/implicit_gemm_convolution.html) - describes 2-D and 3-D convolution in CUTLASS
|
||||
- [Code Organization](https://docs.nvidia.com/cutlass/media/docs/cpp/code_organization.html) - describes the organization and contents of the CUTLASS project
|
||||
- [Terminology](https://docs.nvidia.com/cutlass/media/docs/cpp/terminology.html) - describes terms used in the code
|
||||
- [Programming Guidelines](https://docs.nvidia.com/cutlass/media/docs/cpp/programming_guidelines.html) - guidelines for writing efficient modern CUDA C++
|
||||
- [Fundamental types](https://docs.nvidia.com/cutlass/media/docs/cpp/fundamental_types.html) - describes basic C++ classes used in CUTLASS to represent numeric quantities and arrays
|
||||
- [Layouts](https://docs.nvidia.com/cutlass/media/docs/cpp/layout.html) - describes layouts of matrices and tensors in memory
|
||||
- [Tile Iterators](https://docs.nvidia.com/cutlass/media/docs/cpp/tile_iterator_concept.html) - describes C++ concepts for iterating over tiles of matrices in memory
|
||||
- [CUTLASS Profiler](https://docs.nvidia.com/cutlass/media/docs/cpp/profiler.html) - command-line driven profiling application
|
||||
- [CUTLASS Utilities](https://docs.nvidia.com/cutlass/media/docs/cpp/utilities.html) - additional templates used to facilitate rapid development
|
||||
- [Dependent kernel launch](https://docs.nvidia.com/cutlass/media/docs/cpp/dependent_kernel_launch.html) - describes a new feature in Hopper which allows overlapping dependent
|
||||
kernels in the same stream, and how it is used in CUTLASS.
|
||||
|
||||
# Resources
|
||||
We have also described the structure of an efficient GEMM in our talk at the
|
||||
[GPU Technology Conference 2018](http://on-demand.gputechconf.com/gtc/2018/presentation/s8854-cutlass-software-primitives-for-dense-linear-algebra-at-all-levels-and-scales-within-cuda.pdf).
|
||||
|
||||
- [CUTLASS: Software Primitives for Dense Linear Algebra at All Levels and Scales within CUDA](https://www.nvidia.com/en-us/on-demand/session/gtcsiliconvalley2018-s8854/)
|
||||
- [Developing CUDA Kernels to Push Tensor Cores to the Absolute Limit on NVIDIA A100](https://www.nvidia.com/en-us/on-demand/session/gtcsj20-s21745/)
|
||||
- [Accelerating Convolution with Tensor Cores in CUTLASS](https://www.nvidia.com/en-us/on-demand/session/gtcspring21-s31883/)
|
||||
- [Accelerating Backward Data Gradient by Increasing Tensor Core Utilization in CUTLASS](https://www.nvidia.com/en-us/on-demand/session/gtcspring22-s41996/)
|
||||
- [CUTLASS: Python API, Enhancements, and NVIDIA Hopper](https://www.nvidia.com/en-us/on-demand/session/gtcfall22-a41131/)
|
||||
|
||||
# Building CUTLASS
|
||||
|
||||
CUTLASS is a header-only template library and does not need to be built to be used by other
|
||||
projects. Client applications should target CUTLASS's `include/` directory in their include
|
||||
paths.
|
||||
|
||||
CUTLASS unit tests, examples, and utilities can be build with CMake.
|
||||
The minimum version of CMake is given in the [Quickstart guide](https://docs.nvidia.com/cutlass/media/docs/cpp/quickstart.html).
|
||||
Make sure the `CUDACXX` environment variable points to NVCC in the CUDA Toolkit installed
|
||||
on your system.
|
||||
|
||||
```bash
|
||||
$ export CUDACXX=${CUDA_INSTALL_PATH}/bin/nvcc
|
||||
```
|
||||
|
||||
Create a build directory within the CUTLASS project, then run CMake. By default CUTLASS will build kernels
|
||||
for CUDA architecture versions 5.0, 6.0, 6.1, 7.0, 7.5, 8.0, 8.6, 8.9, and 9.0.
|
||||
To reduce compile time you can specify
|
||||
the architectures to build CUTLASS for by changing the CMake configuration setting
|
||||
`CUTLASS_NVCC_ARCHS`.
|
||||
|
||||
```bash
|
||||
$ mkdir build && cd build
|
||||
|
||||
$ cmake .. -DCUTLASS_NVCC_ARCHS=80 # compiles for NVIDIA's Ampere Architecture
|
||||
```
|
||||
|
||||
From the `build/` directory, compile and run the CUTLASS unit tests by building the target `test_unit` with make.
|
||||
|
||||
The unit tests are organized as several binaries mirroring the top-level namespaces of CUTLASS,
|
||||
and they may be executed in parallel via make's `-j` command line argument.
|
||||
|
||||
```bash
|
||||
$ make test_unit -j
|
||||
...
|
||||
...
|
||||
...
|
||||
[----------] Global test environment tear-down
|
||||
[==========] 946 tests from 57 test cases ran. (10812 ms total)
|
||||
[ PASSED ] 946 tests.
|
||||
```
|
||||
|
||||
All tests should pass on supported platforms, though the exact number of tests may vary over time.
|
||||
|
||||
|
||||
# Project Structure
|
||||
|
||||
CUTLASS is arranged as a header-only library along with Utilities, Tools, Examples, and unit tests.
|
||||
[Doxygen documentation](https://nvidia.github.io/cutlass) provides a complete list of files, classes,
|
||||
and template concepts defined in the CUTLASS project.
|
||||
|
||||
A detailed explanation of the source code organization may be found in the
|
||||
[CUTLASS documentation](https://docs.nvidia.com/cutlass/media/docs/cpp/code_organization.html), but several main components are summarized below.
|
||||
|
||||
## CUTLASS Template Library
|
||||
|
||||
```
|
||||
include/ # client applications should target this directory in their build's include paths
|
||||
|
||||
cutlass/ # CUDA Templates for Linear Algebra Subroutines and Solvers - headers only
|
||||
|
||||
arch/ # direct exposure of architecture features (including instruction-level GEMMs)
|
||||
|
||||
conv/ # code specialized for convolution
|
||||
|
||||
epilogue/ # code specialized for the epilogue of gemm/convolution
|
||||
|
||||
gemm/ # code specialized for general matrix product computations
|
||||
|
||||
layout/ # layout definitions for matrices, tensors, and other mathematical objects in memory
|
||||
|
||||
platform/ # CUDA-capable Standard Library components
|
||||
|
||||
reduction/ # bandwidth-limited reduction kernels that do not fit the "gemm" model
|
||||
|
||||
thread/ # simt code that can be performed within a CUDA thread
|
||||
|
||||
transform/ # code specialized for layout, type, and domain transformations
|
||||
|
||||
* # core vocabulary types, containers, and basic numeric operations
|
||||
|
||||
cute/ # CuTe Layout, layout algebra, MMA/Copy atoms, tiled MMA/Copy
|
||||
|
||||
algorithm/ # Definitions of core operations such as copy, gemm, and operations on cute::tuples
|
||||
|
||||
arch/ # Bare bones PTX wrapper structs for copy and math instructions
|
||||
|
||||
atom/ # Meta-information either link to or built from arch/ operators
|
||||
|
||||
mma_atom.hpp # cute::Mma_Atom and cute::TiledMma
|
||||
|
||||
copy_atom.hpp # cute::Copy_Atom and cute::TiledCopy
|
||||
|
||||
*sm*.hpp # Arch specific meta-information for copy and math operations
|
||||
|
||||
* # Core library types such as Shape, Stride, Layout, Tensor, and associated operations
|
||||
|
||||
```
|
||||
|
||||
### CUTLASS SDK Examples
|
||||
|
||||
[CUTLASS SDK examples](https://github.com/NVIDIA/cutlass/tree/main/examples) apply CUTLASS templates to implement basic computations.
|
||||
|
||||
### Tools
|
||||
|
||||
```
|
||||
tools/
|
||||
library/ # CUTLASS Instance Library - contains instantiations of all supported CUTLASS templates
|
||||
include/
|
||||
cutlass/
|
||||
library/
|
||||
|
||||
profiler/ # CUTLASS Profiler - command-line utility for executing operations in the
|
||||
# CUTLASS Library
|
||||
|
||||
util/ # CUTLASS Utilities - contains numerous helper classes for
|
||||
include/ # managing tensors in device memory, reference
|
||||
cutlass/ # implementations for GEMM, random initialization
|
||||
util/ # of tensors, and I/O.
|
||||
```
|
||||
|
||||
### Test
|
||||
|
||||
The `test/unit/` directory consist of unit tests implemented with Google Test that demonstrate
|
||||
basic usage of Core API components and complete tests of the CUTLASS GEMM computations.
|
||||
|
||||
Instructions for building and running the Unit tests are described in the [Quickstart guide](https://docs.nvidia.com/cutlass/media/docs/cpp/quickstart.html).
|
||||
|
||||
# Performance Profiling
|
||||
|
||||
The `tools/profiler/` directory contains a command-line utility for launching each of the GEMM kernels.
|
||||
It can be built as follows:
|
||||
|
||||
```bash
|
||||
$ make cutlass_profiler -j16
|
||||
```
|
||||
## Building all GEMM and Convolution kernels (_long_ build times)
|
||||
|
||||
By default, only one tile size is instantiated for each data type, math instruction, and layout.
|
||||
To instantiate all, set the following environment variable when running CMake from an empty `build/` directory.
|
||||
Beware, this results in *tens of thousands* of kernels and long build times.
|
||||
This would also result in a large binary size and on some platforms linker to fail on building the library.
|
||||
Therefore, it's highly recommended to generate only a subset of kernels as demonstrated in the sub-section below.
|
||||
```bash
|
||||
$ cmake .. -DCUTLASS_NVCC_ARCHS=90a -DCUTLASS_LIBRARY_KERNELS=all
|
||||
...
|
||||
$ make cutlass_profiler -j16
|
||||
```
|
||||
|
||||
## Building a subset of GEMM and Convolution kernels (_reduced_ build times)
|
||||
|
||||
To compile strictly one kernel or a small set of kernels, a comma-delimited list of kernel names with
|
||||
wildcard characters may be used to reduce the set of kernels. The following examples show building exactly one
|
||||
or a subset of kernels for NVIDIA Ampere and Turing architecture:
|
||||
|
||||
### Building a subset Tensor Core GEMM kernels
|
||||
|
||||
To compile a subset of Tensor Core GEMM kernels with FP32 accumulation and FP16 input targeting NVIDIA Ampere and Turing architecture,
|
||||
use the below cmake command line:
|
||||
```bash
|
||||
$ cmake .. -DCUTLASS_NVCC_ARCHS='75;80' -DCUTLASS_LIBRARY_KERNELS=cutlass_tensorop_s*gemm_f16_*_nt_align8
|
||||
...
|
||||
$ make cutlass_profiler -j16
|
||||
```
|
||||
|
||||
Example command line for profiling a subset of Tensor Core GEMM kernels is as follows:
|
||||
```bash
|
||||
./tools/profiler/cutlass_profiler --kernels=cutlass_tensorop_s*gemm_f16_*_nt_align8 --m=3456 --n=4096 --k=4096
|
||||
|
||||
...
|
||||
=============================
|
||||
Problem ID: 1
|
||||
|
||||
Provider: CUTLASS
|
||||
OperationKind: gemm
|
||||
Operation: cutlass_tensorop_s1688gemm_f16_256x128_32x2_nt_align8
|
||||
|
||||
Status: Success
|
||||
Verification: ON
|
||||
Disposition: Passed
|
||||
|
||||
reference_device: Passed
|
||||
cuBLAS: Passed
|
||||
|
||||
Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=4096 --A=f16:column --B=f16:row --C=f32:column --alpha=1 \
|
||||
--beta=0 --split_k_slices=1 --batch_count=1 --op_class=tensorop --accum=f32 --cta_m=256 --cta_n=128 \
|
||||
--cta_k=32 --stages=2 --warps_m=4 --warps_n=2 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=8 --min_cc=75 \
|
||||
--max_cc=1024
|
||||
|
||||
Bytes: 118489088 bytes
|
||||
FLOPs: 115992428544 flops
|
||||
|
||||
Runtime: 1.55948 ms
|
||||
Memory: 70.7616 GiB/s
|
||||
|
||||
Math: 74378.8 GFLOP/s
|
||||
|
||||
|
||||
|
||||
=============================
|
||||
...
|
||||
```
|
||||
|
||||
### Building one CUDA Core GEMM kernel
|
||||
|
||||
To compile one SGEMM kernel targeting NVIDIA Ampere and Turing architecture, use the below cmake command line:
|
||||
```bash
|
||||
$ cmake .. -DCUTLASS_NVCC_ARCHS='75;80' -DCUTLASS_LIBRARY_KERNELS=cutlass_simt_sgemm_128x128_8x2_nn_align1
|
||||
...
|
||||
$ make cutlass_profiler -j16
|
||||
```
|
||||
|
||||
Example command line for profiling single SGEMM CUDA kernel is as follows:
|
||||
```bash
|
||||
$ ./tools/profiler/cutlass_profiler --kernels=sgemm --m=3456 --n=4096 --k=4096
|
||||
|
||||
=============================
|
||||
Problem ID: 1
|
||||
|
||||
Provider: CUTLASS
|
||||
OperationKind: gemm
|
||||
Operation: cutlass_simt_sgemm_128x128_8x2_nn_align1
|
||||
|
||||
Status: Success
|
||||
Verification: ON
|
||||
Disposition: Passed
|
||||
|
||||
cuBLAS: Passed
|
||||
|
||||
Arguments: --m=3456 --n=4096 --k=4096 --A=f32:column --B=f32:column --C=f32:column --alpha=1 --beta=0 --split_k_slices=1 \
|
||||
--batch_count=1 --op_class=simt --accum=f32 --cta_m=128 --cta_n=128 --cta_k=8 --stages=2 --warps_m=4 \
|
||||
--warps_n=2 --warps_k=1 --inst_m=1 --inst_n=1 --inst_k=1 --min_cc=50 --max_cc=1024
|
||||
|
||||
Bytes: 180355072 bytes
|
||||
FLOPs: 115992428544 flops
|
||||
|
||||
Runtime: 6.73655 ms
|
||||
Memory: 24.934 GiB/s
|
||||
|
||||
Math: 17218.4 GFLOP/s
|
||||
|
||||
=============================
|
||||
```
|
||||
|
||||
### Building a subset of Tensor Core Convolution kernels
|
||||
|
||||
To compile a subset of Tensor core convolution kernels implementing forward propagation (fprop) with FP32 accumulation
|
||||
and FP16 input targeting NVIDIA Ampere and Turing architecture, use the below cmake command line:
|
||||
```bash
|
||||
$ cmake .. -DCUTLASS_NVCC_ARCHS='75;80' -DCUTLASS_LIBRARY_KERNELS=cutlass_tensorop_s*fprop_optimized_f16
|
||||
...
|
||||
$ make cutlass_profiler -j16
|
||||
```
|
||||
|
||||
Example command line for profiling a subset of Tensor Core convolution kernels is as follows:
|
||||
|
||||
```bash
|
||||
$ ./tools/profiler/cutlass_profiler --kernels=cutlass_tensorop_s*fprop_optimized_f16 --n=8 --h=224 --w=224 --c=128 --k=128 --r=3 --s=3
|
||||
|
||||
...
|
||||
=============================
|
||||
Problem ID: 1
|
||||
|
||||
Provider: CUTLASS
|
||||
OperationKind: conv2d
|
||||
Operation: cutlass_tensorop_s16816fprop_optimized_f16_128x128_32x5_nhwc
|
||||
|
||||
Status: Success
|
||||
Verification: ON
|
||||
Disposition: Passed
|
||||
|
||||
reference_device: Passed
|
||||
|
||||
Arguments: --conv_kind=fprop --n=8 --h=224 --w=224 --c=128 --k=128 --r=3 --s=3 --p=224 --q=224 --pad_h=1 --pad_w=1 \
|
||||
--stride_h=1 --stride_w=1 --dilation_h=1 --dilation_w=1 --Activation=f16:nhwc --Filter=f16:nhwc --Output=f32:nhwc \
|
||||
--conv_mode=cross --iterator_algorithm=optimized --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 \
|
||||
--eq_gemm_provider=none --op_class=tensorop --accum=f32 --cta_m=128 --cta_n=128 --cta_k=32 --stages=5 \
|
||||
--warps_m=2 --warps_n=2 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=16 --min_cc=80 --max_cc=1024
|
||||
|
||||
Bytes: 1130659840 bytes
|
||||
FLOPs: 118482796544 flops
|
||||
|
||||
Runtime: 0.711496 ms
|
||||
Memory: 1479.99 GiB/s
|
||||
|
||||
Math: 166526 GFLOP/s
|
||||
|
||||
=============================
|
||||
...
|
||||
```
|
||||
|
||||
|
||||
### Building one Convolution CUDA kernel
|
||||
|
||||
To compile and run one CUDA Core convolution kernel implementing forward propagation (fprop) with F32 accumulation
|
||||
and FP32 input targeting NVIDIA Ampere and Turing architecture, use the below cmake command line:
|
||||
```bash
|
||||
$ cmake .. -DCUTLASS_NVCC_ARCHS='75;80' -DCUTLASS_LIBRARY_KERNELS=cutlass_simt_sfprop_optimized_128x128_8x2_nhwc
|
||||
...
|
||||
$ make cutlass_profiler -j16
|
||||
```
|
||||
|
||||
Example command line for profiling one CUDA Core convolution kernel:
|
||||
|
||||
```bash
|
||||
$ ./tools/profiler/cutlass_profiler --kernels=cutlass_simt_sfprop_optimized_128x128_8x2_nhwc --n=8 --h=224 --w=224 --c=128 --k=128 --r=3 --s=3
|
||||
|
||||
|
||||
=============================
|
||||
Problem ID: 1
|
||||
|
||||
Provider: CUTLASS
|
||||
OperationKind: conv2d
|
||||
Operation: cutlass_simt_sfprop_optimized_128x128_8x2_nhwc
|
||||
|
||||
Status: Success
|
||||
Verification: ON
|
||||
Disposition: Passed
|
||||
|
||||
reference_device: Passed
|
||||
|
||||
Arguments: --conv_kind=fprop --n=8 --h=224 --w=224 --c=128 --k=128 --r=3 --s=3 --p=224 --q=224 --pad_h=1 --pad_w=1 \
|
||||
--stride_h=1 --stride_w=1 --dilation_h=1 --dilation_w=1 --Activation=f32:nhwc --Filter=f32:nhwc --Output=f32:nhwc \
|
||||
--conv_mode=cross --iterator_algorithm=optimized --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 \
|
||||
--eq_gemm_provider=none --op_class=simt --accum=f32 --cta_m=128 --cta_n=128 --cta_k=8 --stages=2 --warps_m=4 \
|
||||
--warps_n=2 --warps_k=1 --inst_m=1 --inst_n=1 --inst_k=1 --min_cc=50 --max_cc=1024
|
||||
|
||||
Bytes: 2055798784 bytes
|
||||
FLOPs: 118482796544 flops
|
||||
|
||||
Runtime: 7.34266 ms
|
||||
Memory: 260.752 GiB/s
|
||||
|
||||
Math: 16136.2 GFLOP/s
|
||||
|
||||
|
||||
=============================
|
||||
|
||||
```
|
||||
|
||||
## More Details on Compiling CUTLASS Kernels and CUTLASS Profiler
|
||||
- Please follow the links for more CMake examples on selectively compiling CUTLASS kernels:
|
||||
- [GEMM CMake Examples](https://docs.nvidia.com/cutlass/media/docs/cpp/quickstart.html#gemm-cmake-examples)
|
||||
- [Implicit GEMM convolution CMake Examples](https://docs.nvidia.com/cutlass/media/docs/cpp/quickstart.html#convolution-cmake-examples)
|
||||
- [Further details about the CUTLASS Profiler are described here.](https://docs.nvidia.com/cutlass/media/docs/cpp/profiler.html)
|
||||
|
||||
|
||||
# About
|
||||
|
||||
CUTLASS is released by NVIDIA Corporation as Open Source software under the
|
||||
[3-clause "New" BSD license](LICENSE.txt).
|
||||
|
||||
# Contributors
|
||||
|
||||
The official list of CUTLASS developers and contributors is available here: [CONTRIBUTORS](CONTRIBUTORS.md).
|
||||
|
||||
# Copyright
|
||||
|
||||
Copyright (c) 2017 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
```
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
3. Neither the name of the copyright holder nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
```
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
# Copyright (c) 2019 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
# A small utility function which generates a C-header from an input file
|
||||
function(FILE_TO_C_STRING FILENAME VARIABLE_NAME OUTPUT_STRING ZERO_TERMINATED)
|
||||
FILE(READ "${FILENAME}" HEX_INPUT HEX)
|
||||
if (${ZERO_TERMINATED})
|
||||
string(APPEND HEX_INPUT "00")
|
||||
endif()
|
||||
|
||||
string(REGEX REPLACE "(....)" "\\1\n" HEX_OUTPUT ${HEX_INPUT})
|
||||
string(REGEX REPLACE "([0-9a-f][0-9a-f])" "char(0x\\1)," HEX_OUTPUT ${HEX_OUTPUT})
|
||||
|
||||
set(HEX_OUTPUT "static char const ${VARIABLE_NAME}[] = {\n ${HEX_OUTPUT}\n};\n")
|
||||
|
||||
set(${OUTPUT_STRING} "${HEX_OUTPUT}" PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
# message("Create header file for ${FILE_IN}")
|
||||
# message("Create header file for ${FILE_OUT}")
|
||||
file_to_c_string(${FILE_IN} ${VARIABLE_NAME} OUTPUT_STRING ZERO_TERMINATED)
|
||||
|
||||
set(RESULT "#pragma once\n")
|
||||
string(APPEND RESULT "namespace cutlass {\n")
|
||||
string(APPEND RESULT "namespace nvrtc {\n")
|
||||
string(APPEND RESULT "${OUTPUT_STRING}")
|
||||
string(APPEND RESULT "} // namespace nvrtc\n")
|
||||
string(APPEND RESULT "} // namespace cutlass\n")
|
||||
file(WRITE "${FILE_OUT}" "${RESULT}")
|
||||
|
|
@ -0,0 +1,52 @@
|
|||
# Copyright (c) 2017 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
# Generated file
|
||||
|
||||
set(TEST_SETS_SUPPORTED @TEST_SETS_SUPPORTED@)
|
||||
|
||||
if (NOT DEFINED ENV{CUTLASS_TEST_SETS})
|
||||
set(ENV{CUTLASS_TEST_SETS} @CUTLASS_DEFAULT_ACTIVE_TEST_SETS@)
|
||||
endif()
|
||||
|
||||
foreach(TEST_SET_REQUESTED IN ITEMS $ENV{CUTLASS_TEST_SETS})
|
||||
if (NOT TEST_SET_REQUESTED IN_LIST TEST_SETS_SUPPORTED)
|
||||
message(STATUS "Skipping tests for @TEST_EXE_PATH@ as ${TEST_SET_REQUESTED} is not in the set of [${TEST_SETS_SUPPORTED}].")
|
||||
return()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
set(TEST_EXE_PATH @TEST_EXE_PATH@)
|
||||
set(TEST_EXE_WORKING_DIRECTORY @TEST_EXE_WORKING_DIRECTORY@)
|
||||
set(CUTLASS_USE_EXTENDED_ADD_TEST_FORMAT @TEST_USE_EXTENDED_FORMAT@)
|
||||
|
||||
if (DEFINED ENV{CUTLASS_TEST_EXECUTION_ENVIRONMENT})
|
||||
set(_CUTLASS_TEST_EXECUTION_ENVIRONMENT $ENV{CUTLASS_TEST_EXECUTION_ENVIRONMENT})
|
||||
else()
|
||||
set(_CUTLASS_TEST_EXECUTION_ENVIRONMENT @CUTLASS_TEST_EXECUTION_ENVIRONMENT@)
|
||||
endif()
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
# Copyright (c) 2017 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
if (CUTLASS_USE_EXTENDED_ADD_TEST_FORMAT)
|
||||
# The longform/extended format allows generator expressions to be
|
||||
# expanded property and is useful in contexts where the files need
|
||||
# to be immediately included into being-processed cmake code.
|
||||
add_test(NAME @TESTCASE_NAME@ COMMAND ${_CUTLASS_TEST_EXECUTION_ENVIRONMENT} "${TEST_EXE_PATH}" @TEST_COMMAND_OPTIONS@)
|
||||
else()
|
||||
add_test(@TESTCASE_NAME@ ${_CUTLASS_TEST_EXECUTION_ENVIRONMENT} "${TEST_EXE_PATH}" @TEST_COMMAND_OPTIONS@)
|
||||
endif()
|
||||
|
||||
if (TEST_EXE_WORKING_DIRECTORY)
|
||||
set_tests_properties(@TESTCASE_NAME@ PROPERTIES WORKING_DIRECTORY "${TEST_EXE_WORKING_DIRECTORY}")
|
||||
endif()
|
||||
|
||||
set_tests_properties(@TESTCASE_NAME@ PROPERTIES DISABLED @__DISABLE_TESTS@)
|
||||
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
get_filename_component(NvidiaCutlass_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
|
||||
|
||||
include(CMakeFindDependencyMacro)
|
||||
|
||||
if(TARGET nvidia::cutlass::CUTLASS)
|
||||
return()
|
||||
endif()
|
||||
|
||||
include("${NvidiaCutlass_CMAKE_DIR}/NvidiaCutlassTargets.cmake")
|
||||
|
|
@ -0,0 +1,42 @@
|
|||
# Copyright (c) 2017 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
set(CPACK_PACKAGE_NAME NvidiaCutlass)
|
||||
set(CPACK_PACKAGE_VENDOR NVIDIA)
|
||||
set(CPACK_PACKAGE_CONTACT info@nvidia.com)
|
||||
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "CUTLASS CUDA C++ Template Linear Algebra Library")
|
||||
set(CPACK_PACKAGE_INSTALL_DIRECTORY ${CPACK_PACKAGE_NAME})
|
||||
set(CPACK_PACKAGE_VERSION_MAJOR ${PROJECT_VERSION_MAJOR})
|
||||
set(CPACK_PACKAGE_VERSION_MINOR ${PROJECT_VERSION_MINOR})
|
||||
set(CPACK_PACKAGE_VERSION_PATCH ${PROJECT_VERSION_PATCH})
|
||||
set(CPACK_VERBATIM_VARIABLES YES)
|
||||
# set(CPACK_PACKAGE_DESCRIPTION_FILE ${CMAKE_CURRENT_LIST_DIR}/Description.txt)
|
||||
# set(CPACK_RESOURCE_FILE_WELCOME ${CMAKE_CURRENT_LIST_DIR}/Welcome.txt)
|
||||
# set(CPACK_RESOURCE_FILE_LICENSE ${CMAKE_CURRENT_LIST_DIR}/License.txt)
|
||||
# set(CPACK_RESOURCE_FILE_README ${CMAKE_CURRENT_LIST_DIR}/Readme.txt)
|
||||
include(CPack)
|
||||
|
|
@ -0,0 +1,52 @@
|
|||
# Copyright (c) 2017 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
include(FetchContent)
|
||||
|
||||
set(GOOGLETEST_DIR "" CACHE STRING "Location of local GoogleTest repo to build against")
|
||||
|
||||
if(GOOGLETEST_DIR)
|
||||
set(FETCHCONTENT_SOURCE_DIR_GOOGLETEST ${GOOGLETEST_DIR} CACHE STRING "GoogleTest source directory override")
|
||||
endif()
|
||||
|
||||
set(GTEST_REPOSITORY "https://github.com/google/googletest.git" CACHE STRING "GoogleTest repo to fetch")
|
||||
FetchContent_Declare(
|
||||
googletest
|
||||
GIT_REPOSITORY ${GTEST_REPOSITORY}
|
||||
GIT_TAG v1.14.0
|
||||
)
|
||||
|
||||
FetchContent_GetProperties(googletest)
|
||||
|
||||
if(NOT googletest_POPULATED)
|
||||
FetchContent_Populate(googletest)
|
||||
if (MSVC)
|
||||
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
|
||||
endif()
|
||||
add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR} EXCLUDE_FROM_ALL)
|
||||
endif()
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
/***************************************************************************************************
|
||||
* Copyright (c) 2017 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
**************************************************************************************************/
|
||||
/*! \file
|
||||
\brief Basic CUDA file for testing compiler flags.
|
||||
*/
|
||||
|
||||
__device__ int inner()
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
__global__ void test()
|
||||
{
|
||||
inner();
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
test<<<1,1>>>();
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
/***************************************************************************************************
|
||||
* Copyright (c) 2017 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
**************************************************************************************************/
|
||||
#pragma once
|
||||
|
||||
#define CUTLASS_BUILD @CUTLASS_VERSION_BUILD@
|
||||
#define CUTLASS_REVISION "@CUTLASS_REVISION@"
|
||||
|
|
@ -0,0 +1,152 @@
|
|||
# Copyright (c) 2017 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
message(STATUS "Configuring cublas ...")
|
||||
|
||||
if((DEFINED CUTLASS_ENABLE_CUBLAS AND NOT CUTLASS_ENABLE_CUBLAS) OR
|
||||
(DEFINED CUBLAS_ENABLED AND NOT CUBLAS_ENABLED))
|
||||
|
||||
# Don't add cuBLAS if it's defined and false, assume it's not found.
|
||||
|
||||
set(CUBLAS_FOUND OFF)
|
||||
message(STATUS "cuBLAS Disabled.")
|
||||
|
||||
elseif(NOT TARGET cublas)
|
||||
|
||||
find_path(
|
||||
_CUBLAS_INCLUDE_DIR
|
||||
NAMES cublas_v2.h
|
||||
HINTS
|
||||
${CUBLAS_INCLUDE_PATH}
|
||||
ENV CUBLAS_INCLUDE_PATH
|
||||
${CUBLAS_PATH}
|
||||
ENV CUBLAS_PATH
|
||||
${CUDA_TOOLKIT_ROOT_DIR}
|
||||
PATH_SUFFIXES
|
||||
include
|
||||
)
|
||||
|
||||
find_library(
|
||||
_CUBLAS_LIBRARY
|
||||
NAMES cublas
|
||||
HINTS
|
||||
${CUBLAS_LIBRARY_PATH}
|
||||
ENV CUBLAS_LIBRARY_PATH
|
||||
${_CUBLAS_INCLUDE_DIR}/..
|
||||
${CUBLAS_PATH}
|
||||
ENV CUBLAS_PATH
|
||||
${CUDA_TOOLKIT_ROOT_DIR}
|
||||
PATH_SUFFIXES
|
||||
lib64
|
||||
lib/x64
|
||||
lib
|
||||
)
|
||||
|
||||
if(_CUBLAS_INCLUDE_DIR AND _CUBLAS_LIBRARY)
|
||||
|
||||
message(STATUS "cuBLAS: ${_CUBLAS_LIBRARY}")
|
||||
message(STATUS "cuBLAS: ${_CUBLAS_INCLUDE_DIR}")
|
||||
|
||||
set(CUBLAS_FOUND ON CACHE INTERNAL "cublas Library Found")
|
||||
set(CUBLAS_LIBRARY ${_CUBLAS_LIBRARY})
|
||||
set(CUBLAS_INCLUDE_DIR ${_CUBLAS_INCLUDE_DIR})
|
||||
|
||||
else()
|
||||
|
||||
message(STATUS "cublas not found.")
|
||||
set(CUBLAS_FOUND OFF CACHE INTERNAL "cublas Library Found")
|
||||
|
||||
endif()
|
||||
|
||||
endif()
|
||||
|
||||
set(CUTLASS_ENABLE_CUBLAS ${CUBLAS_FOUND} CACHE BOOL "Enable CUTLASS to build with cuBLAS library.")
|
||||
|
||||
if(CUTLASS_ENABLE_CUBLAS AND NOT CUBLAS_FOUND)
|
||||
message(FATAL_ERROR "CUTLASS_ENABLE_CUBLAS enabled but cuBLAS library could not be found.")
|
||||
endif()
|
||||
|
||||
if(CUTLASS_ENABLE_CUBLAS AND NOT TARGET cublas)
|
||||
|
||||
if(WIN32)
|
||||
add_library(cublas STATIC IMPORTED GLOBAL)
|
||||
else()
|
||||
add_library(cublas SHARED IMPORTED GLOBAL)
|
||||
endif()
|
||||
|
||||
add_library(nvidia::cublas ALIAS cublas)
|
||||
|
||||
set_property(
|
||||
TARGET cublas
|
||||
PROPERTY IMPORTED_LOCATION
|
||||
${CUBLAS_LIBRARY})
|
||||
|
||||
target_include_directories(
|
||||
cublas
|
||||
INTERFACE
|
||||
$<INSTALL_INTERFACE:include>
|
||||
$<BUILD_INTERFACE:${CUBLAS_INCLUDE_DIR}>)
|
||||
|
||||
find_library(
|
||||
_CUBLASLT_LIBRARY
|
||||
NAMES cublasLt
|
||||
HINTS
|
||||
${CUBLAS_LIBRARY_PATH}
|
||||
ENV CUBLAS_LIBRARY_PATH
|
||||
${_CUBLAS_INCLUDE_DIR}/..
|
||||
${CUBLAS_PATH}
|
||||
ENV CUBLAS_PATH
|
||||
${CUDA_TOOLKIT_ROOT_DIR}
|
||||
PATH_SUFFIXES
|
||||
lib64
|
||||
lib/x64
|
||||
lib
|
||||
)
|
||||
|
||||
if(_CUBLASLT_LIBRARY AND NOT TARGET cublasLt)
|
||||
|
||||
if(WIN32)
|
||||
add_library(cublasLt STATIC IMPORTED GLOBAL)
|
||||
else()
|
||||
add_library(cublasLt SHARED IMPORTED GLOBAL)
|
||||
endif()
|
||||
|
||||
set_property(
|
||||
TARGET cublasLt
|
||||
PROPERTY IMPORTED_LOCATION
|
||||
${_CUBLASLT_LIBRARY})
|
||||
|
||||
add_library(nvidia::cublasLt ALIAS cublasLt)
|
||||
|
||||
target_link_libraries(cublas INTERFACE cublasLt)
|
||||
|
||||
endif()
|
||||
|
||||
endif()
|
||||
|
||||
message(STATUS "Configuring cuBLAS ... done.")
|
||||
|
|
@ -0,0 +1,112 @@
|
|||
# Copyright (c) 2017 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
if(DEFINED CUDNN_ENABLED)
|
||||
set(CUTLASS_ENABLE_CUDNN ${CUDNN_ENABLED} CACHE BOOL "Enable CUTLASS to build with cuDNN library.")
|
||||
endif()
|
||||
|
||||
if(DEFINED CUTLASS_ENABLE_CUDNN AND NOT CUTLASS_ENABLE_CUDNN)
|
||||
return()
|
||||
endif()
|
||||
|
||||
message(STATUS "Configuring cuDNN ...")
|
||||
|
||||
find_path(
|
||||
_CUDNN_INCLUDE_DIR cudnn.h
|
||||
PATHS
|
||||
${CUDA_TOOLKIT_ROOT_DIR}/include
|
||||
$ENV{CUDNN_PATH}/include
|
||||
$ENV{CUDA_PATH}/include
|
||||
${CUDNN_PATH}/include
|
||||
/usr/include)
|
||||
|
||||
find_library(
|
||||
_CUDNN_LIBRARY cudnn
|
||||
HINTS
|
||||
${CUDA_TOOLKIT_ROOT_DIR}/lib64
|
||||
${CUDA_TOOLKIT_ROOT_DIR}/lib/x64
|
||||
${CUDA_TOOLKIT_ROOT_DIR}/lib
|
||||
$ENV{CUDNN_PATH}/lib64
|
||||
$ENV{CUDNN_PATH}/lib/x64
|
||||
$ENV{CUDNN_PATH}/lib
|
||||
$ENV{CUDA_PATH}/lib64
|
||||
$ENV{CUDA_PATH}/lib/x64
|
||||
$ENV{CUDA_PATH}/lib
|
||||
${CUDNN_PATH}/lib64
|
||||
${CUDNN_PATH}/lib/x64
|
||||
${CUDNN_PATH}/lib
|
||||
/usr/lib/x86_64-linux-gnu
|
||||
/usr/lib)
|
||||
|
||||
if(_CUDNN_INCLUDE_DIR AND _CUDNN_LIBRARY)
|
||||
|
||||
message(STATUS "cuDNN: ${_CUDNN_LIBRARY}")
|
||||
message(STATUS "cuDNN: ${_CUDNN_INCLUDE_DIR}")
|
||||
|
||||
set(CUDNN_FOUND ON CACHE INTERNAL "cuDNN Library Found")
|
||||
|
||||
else()
|
||||
|
||||
message(STATUS "cuDNN not found.")
|
||||
set(CUDNN_FOUND OFF CACHE INTERNAL "cuDNN Library Found")
|
||||
|
||||
endif()
|
||||
|
||||
set(CUTLASS_ENABLE_CUDNN ${CUDNN_FOUND} CACHE BOOL "Enable CUTLASS to build with cuDNN library.")
|
||||
|
||||
if (CUTLASS_ENABLE_CUDNN AND NOT TARGET cudnn)
|
||||
|
||||
set(CUDNN_INCLUDE_DIR ${_CUDNN_INCLUDE_DIR})
|
||||
set(CUDNN_LIBRARY ${_CUDNN_LIBRARY})
|
||||
|
||||
if(WIN32)
|
||||
add_library(cudnn STATIC IMPORTED GLOBAL)
|
||||
else()
|
||||
add_library(cudnn SHARED IMPORTED GLOBAL)
|
||||
endif()
|
||||
|
||||
add_library(nvidia::cudnn ALIAS cudnn)
|
||||
|
||||
set_property(
|
||||
TARGET cudnn
|
||||
PROPERTY IMPORTED_LOCATION
|
||||
${CUDNN_LIBRARY})
|
||||
|
||||
target_include_directories(
|
||||
cudnn
|
||||
INTERFACE
|
||||
$<INSTALL_INTERFACE:include>
|
||||
$<BUILD_INTERFACE:${CUDNN_INCLUDE_DIR}>)
|
||||
|
||||
endif()
|
||||
|
||||
if(CUTLASS_ENABLE_CUDNN AND NOT CUDNN_FOUND)
|
||||
message(FATAL_ERROR "CUTLASS_ENABLE_CUDNN enabled but cuDNN library could not be found.")
|
||||
endif()
|
||||
|
||||
message(STATUS "Configuring cuDNN ... done.")
|
||||
|
|
@ -0,0 +1,97 @@
|
|||
# Copyright (c) 2017 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# Profiler based functional testing
|
||||
set(CUTLASS_BUILD_FOR_PROFILER_REGRESSIONS OFF CACHE BOOL "Utilize profiler-based functional regressions")
|
||||
set(CUTLASS_PROFILER_REGRESSION_TEST_LEVEL ${CUTLASS_TEST_LEVEL} CACHE STRING "Profiler functional regression test level")
|
||||
|
||||
find_package(Python3 3.5 COMPONENTS Interpreter REQUIRED)
|
||||
|
||||
function(cutlass_generate_kernel_filter_and_testlist_files)
|
||||
|
||||
set(options)
|
||||
set(oneValueArgs TEST_SET_NAME)
|
||||
set(multiValueArgs)
|
||||
cmake_parse_arguments(_ "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
|
||||
|
||||
execute_process(
|
||||
COMMAND ${CMAKE_COMMAND} -E env PYTHONPATH=${CUTLASS_LIBRARY_PACKAGE_DIR}
|
||||
${Python3_EXECUTABLE} ${CUTLASS_SOURCE_DIR}/python/cutlass_library/generator.py
|
||||
--generator-target=${__TEST_SET_NAME}
|
||||
--cuda-version=${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}
|
||||
--architectures=${CUTLASS_NVCC_ARCHS}
|
||||
--kernels=\*
|
||||
--disable-cutlass-package-imports
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
|
||||
RESULT_VARIABLE cutlass_FILTER_GENERATION_RESULT
|
||||
OUTPUT_VARIABLE cutlass_FILTER_GENERATION_OUTPUT
|
||||
OUTPUT_FILE ${CMAKE_CURRENT_BINARY_DIR}/library_filter_generation.log
|
||||
ERROR_FILE ${CMAKE_CURRENT_BINARY_DIR}/library_filter_generation.log
|
||||
)
|
||||
|
||||
if(NOT cutlass_FILTER_GENERATION_RESULT EQUAL 0)
|
||||
message(FATAL_ERROR "Error generating kernel filters and testlist files. See ${CMAKE_CURRENT_BINARY_DIR}/library_filter_generation.log")
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
if(CUTLASS_BUILD_FOR_PROFILER_REGRESSIONS)
|
||||
|
||||
set(PROFILER_ARCH_LIST 100a 100f 103a 120a 120f 121a)
|
||||
if (CUDA_VERSION VERSION_LESS 13.0)
|
||||
list(APPEND PROFILER_ARCH_LIST 101a 101f)
|
||||
else()
|
||||
list(APPEND PROFILER_ARCH_LIST 110a 110f)
|
||||
endif()
|
||||
foreach(ARCH IN LISTS CUTLASS_NVCC_ARCHS)
|
||||
if(NOT (ARCH IN_LIST PROFILER_ARCH_LIST))
|
||||
message(FATAL_ERROR "Only SM${PROFILER_ARCH_LIST} compute capabilities are supported with profiler-based unit tests")
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
if(CUTLASS_PROFILER_REGRESSION_TEST_LEVEL EQUAL 0)
|
||||
|
||||
message(STATUS "Building for L0 profiler-based functional regressions")
|
||||
cutlass_generate_kernel_filter_and_testlist_files(TEST_SET_NAME kernel_testlist_l0)
|
||||
set(KERNEL_FILTER_FILE ${CMAKE_CURRENT_BINARY_DIR}/FK_functional_L0_testlist_SM${CUTLASS_NVCC_ARCHS}_cutlass3x_gemm_kernel_filter.list CACHE STRING "Kernel set")
|
||||
set(CUTLASS_PROFILER_REGRESSION_LIST_FILE ${CMAKE_CURRENT_BINARY_DIR}/FK_functional_L0_testlist_SM${CUTLASS_NVCC_ARCHS}_cutlass3x_gemm.csv CACHE STRING "Regression set")
|
||||
|
||||
elseif (CUTLASS_PROFILER_REGRESSION_TEST_LEVEL EQUAL 1)
|
||||
|
||||
message(STATUS "Building for L1 profiler-based functional regressions")
|
||||
cutlass_generate_kernel_filter_and_testlist_files(TEST_SET_NAME kernel_testlist_l1)
|
||||
set(KERNEL_FILTER_FILE ${CMAKE_CURRENT_BINARY_DIR}/FK_functional_L1_testlist_SM${CUTLASS_NVCC_ARCHS}_cutlass3x_gemm_kernel_filter.list CACHE STRING "Kernel set")
|
||||
set(CUTLASS_PROFILER_REGRESSION_LIST_FILE ${CMAKE_CURRENT_BINARY_DIR}/FK_functional_L1_testlist_SM${CUTLASS_NVCC_ARCHS}_cutlass3x_gemm.csv CACHE STRING "Regression set")
|
||||
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
|
|
@ -0,0 +1 @@
|
|||
theme: jekyll-theme-minimal
|
||||
|
|
@ -0,0 +1,145 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
|
||||
<meta name="generator" content="Doxygen 1.8.11"/>
|
||||
<title>CUTLASS: aligned_buffer.h File Reference</title>
|
||||
<link href="tabs.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="jquery.js"></script>
|
||||
<script type="text/javascript" src="dynsections.js"></script>
|
||||
<link href="search/search.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="search/searchdata.js"></script>
|
||||
<script type="text/javascript" src="search/search.js"></script>
|
||||
<script type="text/javascript">
|
||||
$(document).ready(function() { init_search(); });
|
||||
</script>
|
||||
<script type="text/x-mathjax-config">
|
||||
MathJax.Hub.Config({
|
||||
extensions: ["tex2jax.js"],
|
||||
jax: ["input/TeX","output/HTML-CSS"],
|
||||
});
|
||||
</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
|
||||
<link href="doxygen.css" rel="stylesheet" type="text/css" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
|
||||
<div id="titlearea">
|
||||
<table cellspacing="0" cellpadding="0">
|
||||
<tbody>
|
||||
<tr style="height: 56px;">
|
||||
<td id="projectlogo"><img alt="Logo" src="cutlass-logo-small.png"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">CUTLASS
|
||||
</div>
|
||||
<div id="projectbrief">CUDA Templates for Linear Algebra Subroutines and Solvers</div>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<!-- end header part -->
|
||||
<!-- Generated by Doxygen 1.8.11 -->
|
||||
<script type="text/javascript">
|
||||
var searchBox = new SearchBox("searchBox", "search",false,'Search');
|
||||
</script>
|
||||
<div id="navrow1" class="tabs">
|
||||
<ul class="tablist">
|
||||
<li><a href="index.html"><span>Main Page</span></a></li>
|
||||
<li><a href="modules.html"><span>Modules</span></a></li>
|
||||
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
|
||||
<li><a href="annotated.html"><span>Classes</span></a></li>
|
||||
<li class="current"><a href="files.html"><span>Files</span></a></li>
|
||||
<li>
|
||||
<div id="MSearchBox" class="MSearchBoxInactive">
|
||||
<span class="left">
|
||||
<img id="MSearchSelect" src="search/mag_sel.png"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
alt=""/>
|
||||
<input type="text" id="MSearchField" value="Search" accesskey="S"
|
||||
onfocus="searchBox.OnSearchFieldFocus(true)"
|
||||
onblur="searchBox.OnSearchFieldFocus(false)"
|
||||
onkeyup="searchBox.OnSearchFieldChange(event)"/>
|
||||
</span><span class="right">
|
||||
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
|
||||
</span>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div id="navrow2" class="tabs2">
|
||||
<ul class="tablist">
|
||||
<li><a href="files.html"><span>File List</span></a></li>
|
||||
<li><a href="globals.html"><span>File Members</span></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<!-- window showing the filter options -->
|
||||
<div id="MSearchSelectWindow"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
onkeydown="return searchBox.OnSearchSelectKey(event)">
|
||||
</div>
|
||||
|
||||
<!-- iframe showing the search results (closed by default) -->
|
||||
<div id="MSearchResultsWindow">
|
||||
<iframe src="javascript:void(0)" frameborder="0"
|
||||
name="MSearchResults" id="MSearchResults">
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
<div id="nav-path" class="navpath">
|
||||
<ul>
|
||||
<li class="navelem"><a class="el" href="dir_d44c64559bbebec7f509842c48db8b23.html">include</a></li><li class="navelem"><a class="el" href="dir_6baf2bb612a2f0daa69af3101ede80a1.html">cutlass</a></li> </ul>
|
||||
</div>
|
||||
</div><!-- top -->
|
||||
<div class="header">
|
||||
<div class="summary">
|
||||
<a href="#nested-classes">Classes</a> |
|
||||
<a href="#namespaces">Namespaces</a> </div>
|
||||
<div class="headertitle">
|
||||
<div class="title">aligned_buffer.h File Reference</div> </div>
|
||||
</div><!--header-->
|
||||
<div class="contents">
|
||||
|
||||
<p>AlignedBuffer is a container for trivially copyable elements suitable for use in unions and shared memory.
|
||||
<a href="#details">More...</a></p>
|
||||
<div class="textblock"><code>#include "<a class="el" href="cutlass_8h_source.html">cutlass/cutlass.h</a>"</code><br />
|
||||
<code>#include "<a class="el" href="array_8h_source.html">cutlass/array.h</a>"</code><br />
|
||||
</div><div class="textblock"><div class="dynheader">
|
||||
Include dependency graph for aligned_buffer.h:</div>
|
||||
<div class="dyncontent">
|
||||
<div class="center"><img src="aligned__buffer_8h__incl.png" border="0" usemap="#aligned__buffer_8h" alt=""/></div>
|
||||
<map name="aligned__buffer_8h" id="aligned__buffer_8h">
|
||||
</map>
|
||||
</div>
|
||||
</div><div class="textblock"><div class="dynheader">
|
||||
This graph shows which files directly or indirectly include this file:</div>
|
||||
<div class="dyncontent">
|
||||
<div class="center"><img src="aligned__buffer_8h__dep__incl.png" border="0" usemap="#aligned__buffer_8hdep" alt=""/></div>
|
||||
<map name="aligned__buffer_8hdep" id="aligned__buffer_8hdep">
|
||||
</map>
|
||||
</div>
|
||||
</div>
|
||||
<p><a href="aligned__buffer_8h_source.html">Go to the source code of this file.</a></p>
|
||||
<table class="memberdecls">
|
||||
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="nested-classes"></a>
|
||||
Classes</h2></td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct  </td><td class="memItemRight" valign="bottom"><a class="el" href="structcutlass_1_1AlignedBuffer.html">cutlass::AlignedBuffer< T, N, Align ></a></td></tr>
|
||||
<tr class="memdesc:"><td class="mdescLeft"> </td><td class="mdescRight">Modifies semantics of cutlass::Array<> to provide guaranteed alignment. <a href="structcutlass_1_1AlignedBuffer.html#details">More...</a><br /></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
</table><table class="memberdecls">
|
||||
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="namespaces"></a>
|
||||
Namespaces</h2></td></tr>
|
||||
<tr class="memitem:namespacecutlass"><td class="memItemLeft" align="right" valign="top">  </td><td class="memItemRight" valign="bottom"><a class="el" href="namespacecutlass.html">cutlass</a></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
</table>
|
||||
</div><!-- contents -->
|
||||
<!-- start footer part -->
|
||||
<hr class="footer"/><address class="footer"><small>
|
||||
Generated by  <a href="http://www.doxygen.org/index.html">
|
||||
<img class="footer" src="doxygen.png" alt="doxygen"/>
|
||||
</a> 1.8.11
|
||||
</small></address>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1 @@
|
|||
6cbc6b81ede44b5f08afd4f4519d56d1
|
||||
|
|
@ -0,0 +1 @@
|
|||
b26c62930ff7668b89f2ee6624e0be3a
|
||||
File diff suppressed because one or more lines are too long
|
|
@ -0,0 +1,867 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
|
||||
<meta name="generator" content="Doxygen 1.8.11"/>
|
||||
<title>CUTLASS: Class List</title>
|
||||
<link href="tabs.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="jquery.js"></script>
|
||||
<script type="text/javascript" src="dynsections.js"></script>
|
||||
<link href="search/search.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="search/searchdata.js"></script>
|
||||
<script type="text/javascript" src="search/search.js"></script>
|
||||
<script type="text/javascript">
|
||||
$(document).ready(function() { init_search(); });
|
||||
</script>
|
||||
<script type="text/x-mathjax-config">
|
||||
MathJax.Hub.Config({
|
||||
extensions: ["tex2jax.js"],
|
||||
jax: ["input/TeX","output/HTML-CSS"],
|
||||
});
|
||||
</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
|
||||
<link href="doxygen.css" rel="stylesheet" type="text/css" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
|
||||
<div id="titlearea">
|
||||
<table cellspacing="0" cellpadding="0">
|
||||
<tbody>
|
||||
<tr style="height: 56px;">
|
||||
<td id="projectlogo"><img alt="Logo" src="cutlass-logo-small.png"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">CUTLASS
|
||||
</div>
|
||||
<div id="projectbrief">CUDA Templates for Linear Algebra Subroutines and Solvers</div>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<!-- end header part -->
|
||||
<!-- Generated by Doxygen 1.8.11 -->
|
||||
<script type="text/javascript">
|
||||
var searchBox = new SearchBox("searchBox", "search",false,'Search');
|
||||
</script>
|
||||
<div id="navrow1" class="tabs">
|
||||
<ul class="tablist">
|
||||
<li><a href="index.html"><span>Main Page</span></a></li>
|
||||
<li><a href="modules.html"><span>Modules</span></a></li>
|
||||
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
|
||||
<li class="current"><a href="annotated.html"><span>Classes</span></a></li>
|
||||
<li><a href="files.html"><span>Files</span></a></li>
|
||||
<li>
|
||||
<div id="MSearchBox" class="MSearchBoxInactive">
|
||||
<span class="left">
|
||||
<img id="MSearchSelect" src="search/mag_sel.png"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
alt=""/>
|
||||
<input type="text" id="MSearchField" value="Search" accesskey="S"
|
||||
onfocus="searchBox.OnSearchFieldFocus(true)"
|
||||
onblur="searchBox.OnSearchFieldFocus(false)"
|
||||
onkeyup="searchBox.OnSearchFieldChange(event)"/>
|
||||
</span><span class="right">
|
||||
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
|
||||
</span>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div id="navrow2" class="tabs2">
|
||||
<ul class="tablist">
|
||||
<li class="current"><a href="annotated.html"><span>Class List</span></a></li>
|
||||
<li><a href="classes.html"><span>Class Index</span></a></li>
|
||||
<li><a href="inherits.html"><span>Class Hierarchy</span></a></li>
|
||||
<li><a href="functions.html"><span>Class Members</span></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div><!-- top -->
|
||||
<!-- window showing the filter options -->
|
||||
<div id="MSearchSelectWindow"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
onkeydown="return searchBox.OnSearchSelectKey(event)">
|
||||
</div>
|
||||
|
||||
<!-- iframe showing the search results (closed by default) -->
|
||||
<div id="MSearchResultsWindow">
|
||||
<iframe src="javascript:void(0)" frameborder="0"
|
||||
name="MSearchResults" id="MSearchResults">
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
<div class="header">
|
||||
<div class="headertitle">
|
||||
<div class="title">Class List</div> </div>
|
||||
</div><!--header-->
|
||||
<div class="contents">
|
||||
<div class="textblock">Here are the classes, structs, unions and interfaces with brief descriptions:</div><div class="directory">
|
||||
<div class="levels">[detail level <span onclick="javascript:toggleLevel(1);">1</span><span onclick="javascript:toggleLevel(2);">2</span><span onclick="javascript:toggleLevel(3);">3</span><span onclick="javascript:toggleLevel(4);">4</span><span onclick="javascript:toggleLevel(5);">5</span><span onclick="javascript:toggleLevel(6);">6</span>]</div><table class="directory">
|
||||
<tr id="row_0_" class="even"><td class="entry"><span style="width:0px;display:inline-block;"> </span><span id="arr_0_" class="arrow" onclick="toggleFolder('0_')">►</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacecutlass.html" target="_self">cutlass</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_0_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;"> </span><span id="arr_0_0_" class="arrow" onclick="toggleFolder('0_0_')">►</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacecutlass_1_1arch.html" target="_self">arch</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_0_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma.html" target="_self">Mma</a></td><td class="desc">Matrix multiply-add operation </td></tr>
|
||||
<tr id="row_0_0_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_011_01_4_00_011_00_01complex_30fa42e1ad201df010637cd22fc070a1.html" target="_self">Mma< gemm::GemmShape< 1, 1, 1 >, 1, complex< double >, LayoutA, complex< double >, LayoutB, complex< double >, LayoutC, OpMultiplyAdd ></a></td><td class="desc">Matrix multiply-add operation </td></tr>
|
||||
<tr id="row_0_0_2_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_011_01_4_00_011_00_01complex_48b3a43bc03fff93a111ac01abe7e40d.html" target="_self">Mma< gemm::GemmShape< 1, 1, 1 >, 1, complex< double >, LayoutA, double, LayoutB, complex< double >, LayoutC, OpMultiplyAdd ></a></td><td class="desc">Matrix multiply-add operation </td></tr>
|
||||
<tr id="row_0_0_3_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_011_01_4_00_011_00_01complex_76f9d24016e1b4167b16f4d7628c9546.html" target="_self">Mma< gemm::GemmShape< 1, 1, 1 >, 1, complex< float >, LayoutA, complex< float >, LayoutB, complex< float >, LayoutC, OpMultiplyAdd ></a></td><td class="desc">Matrix multiply-add operation </td></tr>
|
||||
<tr id="row_0_0_4_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_011_01_4_00_011_00_01complex_f1c9d2ee842455cd0c5b71d56108d468.html" target="_self">Mma< gemm::GemmShape< 1, 1, 1 >, 1, complex< float >, LayoutA, float, LayoutB, complex< float >, LayoutC, OpMultiplyAdd ></a></td><td class="desc">Matrix multiply-add operation </td></tr>
|
||||
<tr id="row_0_0_5_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_011_01_4_00_011_00_01double_070b94670e040ed5855e5b42d5ca8a443.html" target="_self">Mma< gemm::GemmShape< 1, 1, 1 >, 1, double, LayoutA, complex< double >, LayoutB, complex< double >, LayoutC, OpMultiplyAdd ></a></td><td class="desc">Matrix multiply-add operation </td></tr>
|
||||
<tr id="row_0_0_6_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_011_01_4_00_011_00_01double_0aa57e6a2e6b5da37d10688bf99419a23.html" target="_self">Mma< gemm::GemmShape< 1, 1, 1 >, 1, double, LayoutA, double, LayoutB, double, LayoutC, OpMultiplyAdd ></a></td><td class="desc">Matrix multiply-add operation </td></tr>
|
||||
<tr id="row_0_0_7_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_011_01_4_00_011_00_01ElementAb6e65b2cf5ede7f41cb070a767158dee.html" target="_self">Mma< gemm::GemmShape< 1, 1, 1 >, 1, ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, Operator ></a></td><td class="desc">Matrix multiply-add operation - specialized for 1x1x1x1 matrix multiply operation </td></tr>
|
||||
<tr id="row_0_0_8_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_011_01_4_00_011_00_01float_00e3e12e263df6506b8cf06c3f4d478b8e.html" target="_self">Mma< gemm::GemmShape< 1, 1, 1 >, 1, float, LayoutA, complex< float >, LayoutB, complex< float >, LayoutC, OpMultiplyAdd ></a></td><td class="desc">Matrix multiply-add operation </td></tr>
|
||||
<tr id="row_0_0_9_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_011_01_4_00_011_00_01float_004bb3fd76ca2af7b3210676fa9644d95b.html" target="_self">Mma< gemm::GemmShape< 1, 1, 1 >, 1, float, LayoutA, float, LayoutB, float, LayoutC, OpMultiplyAdd ></a></td><td class="desc">Matrix multiply-add operation </td></tr>
|
||||
<tr id="row_0_0_10_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_011_01_4_00_011_00_01half__t_4f30ee91f7bb3844ff7579c68d078818.html" target="_self">Mma< gemm::GemmShape< 1, 1, 1 >, 1, half_t, LayoutA, half_t, LayoutB, float, LayoutC, OpMultiplyAdd ></a></td><td class="desc">Matrix multiply-add operation </td></tr>
|
||||
<tr id="row_0_0_11_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_011_01_4_00_011_00_01int_00_00b2dff9ce8caad9aff5bc6a355539161.html" target="_self">Mma< gemm::GemmShape< 1, 1, 1 >, 1, int, LayoutA, int, LayoutB, int, LayoutC, OpMultiplyAdd ></a></td><td class="desc">Matrix multiply-add operation </td></tr>
|
||||
<tr id="row_0_0_12_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_012_01_4_00_011_00_01int16__t8c4bac365710598317a69c489f7239db.html" target="_self">Mma< gemm::GemmShape< 1, 1, 2 >, 1, int16_t, layout::RowMajor, int16_t, layout::ColumnMajor, int, LayoutC, OpMultiplyAdd ></a></td><td class="desc">Matrix multiply-add operation </td></tr>
|
||||
<tr id="row_0_0_13_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_014_01_4_00_011_00_01int8__t_a1ef6624fc8c10126f17f4ee88283d72.html" target="_self">Mma< gemm::GemmShape< 1, 1, 4 >, 1, int8_t, LayoutA, int8_t, LayoutB, int, LayoutC, OpMultiplyAdd ></a></td><td class="desc">Matrix multiply-add operation </td></tr>
|
||||
<tr id="row_0_0_14_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_012_00_011_01_4_00_011_00_01half__t_f3dc2e59f857ada163d1e0781ea8f391.html" target="_self">Mma< gemm::GemmShape< 1, 2, 1 >, 1, half_t, LayoutA, half_t, LayoutB, half_t, layout::RowMajor, OpMultiplyAdd ></a></td><td class="desc">Matrix multiply-add operation </td></tr>
|
||||
<tr id="row_0_0_15_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_0116_00_0116_00_014_01_4_00_0132_00_01half_0bcc4d05f9811035f08cc1b7f0154a4d.html" target="_self">Mma< gemm::GemmShape< 16, 16, 4 >, 32, half_t, LayoutA, half_t, LayoutB, ElementC, LayoutC, Operator ></a></td><td class="desc">Matrix multiply-add operation specialized for the entire warp </td></tr>
|
||||
<tr id="row_0_0_16_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_0116_00_018_00_018_01_4_00_0132_00_01half__02a3f19a78995f97d793a668e0e4d4f0.html" target="_self">Mma< gemm::GemmShape< 16, 8, 8 >, 32, half_t, layout::RowMajor, half_t, layout::ColumnMajor, float, layout::RowMajor, OpMultiplyAdd ></a></td><td class="desc">Matrix multiply-add operation: F32 = F16 * F16 + F32 </td></tr>
|
||||
<tr id="row_0_0_17_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_0116_00_018_00_018_01_4_00_0132_00_01half__96363097c47b056f0ca1911afd7f8b7a.html" target="_self">Mma< gemm::GemmShape< 16, 8, 8 >, 32, half_t, layout::RowMajor, half_t, layout::ColumnMajor, half_t, layout::RowMajor, OpMultiplyAdd ></a></td><td class="desc">Matrix multiply-add operation - F16 = F16 * F16 + F16 </td></tr>
|
||||
<tr id="row_0_0_18_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_012_00_011_00_011_01_4_00_011_00_01half__t_8cf78649807b93684f3d431bfa34ee28.html" target="_self">Mma< gemm::GemmShape< 2, 1, 1 >, 1, half_t, LayoutA, half_t, LayoutB, half_t, LayoutC, OpMultiplyAdd ></a></td><td class="desc">Matrix multiply-add operation </td></tr>
|
||||
<tr id="row_0_0_19_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_012_00_012_00_011_01_4_00_011_00_01half__t_ccde11d1bbbdab3702772ce44eb9729a.html" target="_self">Mma< gemm::GemmShape< 2, 2, 1 >, 1, half_t, layout::ColumnMajor, half_t, layout::RowMajor, half_t, layout::ColumnMajor, OpMultiplyAdd ></a></td><td class="desc">Matrix multiply-add operation </td></tr>
|
||||
<tr id="row_0_0_20_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_012_00_012_00_011_01_4_00_011_00_01half__t_c07cc6439298fa5486a719e577be2538.html" target="_self">Mma< gemm::GemmShape< 2, 2, 1 >, 1, half_t, layout::ColumnMajor, half_t, layout::RowMajor, half_t, layout::RowMajor, OpMultiplyAdd ></a></td><td class="desc">Matrix multiply-add operation </td></tr>
|
||||
<tr id="row_0_0_21_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_018_00_018_00_01128_01_4_00_0132_00_01uint15918972b95027764b3a849b03075ed2b.html" target="_self">Mma< gemm::GemmShape< 8, 8, 128 >, 32, uint1b_t, layout::RowMajor, uint1b_t, layout::ColumnMajor, int, layout::RowMajor, OpXorPopc ></a></td><td class="desc">Matrix multiply-add operation </td></tr>
|
||||
<tr id="row_0_0_22_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_018_00_018_00_0116_01_4_00_0132_00_01int8__927179f46017ea5f58f859f1196c4829.html" target="_self">Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd ></a></td><td class="desc">Matrix multiply-add operation: S32 = S8 * S8 + S32 </td></tr>
|
||||
<tr id="row_0_0_23_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_018_00_018_00_0116_01_4_00_0132_00_01int8__8ebae0cbdf333fddfe5c24d35ebe8e02.html" target="_self">Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate ></a></td><td class="desc">Matrix multiply-add operation: S32 = S8 * S8 + S32 </td></tr>
|
||||
<tr id="row_0_0_24_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_018_00_018_00_0116_01_4_00_0132_00_01int8__5299c9c90c8f2f521be0c8cec1c3eb08.html" target="_self">Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd ></a></td><td class="desc">Matrix multiply-add operation: S32 = S8 * U8 + S32 </td></tr>
|
||||
<tr id="row_0_0_25_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_018_00_018_00_0116_01_4_00_0132_00_01int8__f083347e265b1e9eea5572d86ddb6bf9.html" target="_self">Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate ></a></td><td class="desc">Matrix multiply-add operation: S32 = S8 * U8 + S32 </td></tr>
|
||||
<tr id="row_0_0_26_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_018_00_018_00_0116_01_4_00_0132_00_01uint8_a62aa63a212985df306fb27e8a50aeae.html" target="_self">Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd ></a></td><td class="desc">Matrix multiply-add operation: S32 = U8 * S8 + S32 </td></tr>
|
||||
<tr id="row_0_0_27_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_018_00_018_00_0116_01_4_00_0132_00_01uint8_ab741d81fdc991345cb9e43c29fca573.html" target="_self">Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate ></a></td><td class="desc">Matrix multiply-add operation: S32 = U8 * S8 + S32 </td></tr>
|
||||
<tr id="row_0_0_28_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_018_00_018_00_0116_01_4_00_0132_00_01uint8_5221708cec5828d35db1d1c47cb4964e.html" target="_self">Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd ></a></td><td class="desc">Matrix multiply-add operation: S32 = S8 * U8 + S32 </td></tr>
|
||||
<tr id="row_0_0_29_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_018_00_018_00_0116_01_4_00_0132_00_01uint8_bef0c048bc0f8ba2d875cb7ab26d363b.html" target="_self">Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate ></a></td><td class="desc">Matrix multiply-add operation: S32 = S8 * U8 + S32 </td></tr>
|
||||
<tr id="row_0_0_30_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_018_00_018_00_0132_01_4_00_0132_00_01int4b_6e513ccbc44ae7909a60d93b9b5435b3.html" target="_self">Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd ></a></td><td class="desc">Matrix multiply-add operation: S32 = S4 * S4 + S32 </td></tr>
|
||||
<tr id="row_0_0_31_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_018_00_018_00_0132_01_4_00_0132_00_01int4b_0ee08a4520882d24ba9026879265e892.html" target="_self">Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate ></a></td><td class="desc">Matrix multiply-add operation: S32 = S4 * S4 + S32 </td></tr>
|
||||
<tr id="row_0_0_32_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_018_00_018_00_0132_01_4_00_0132_00_01int4b_4746fc55e614df0016c518d3fda2677e.html" target="_self">Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd ></a></td><td class="desc">Matrix multiply-add operation: S32 = S4 * U4 + S32 </td></tr>
|
||||
<tr id="row_0_0_33_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_018_00_018_00_0132_01_4_00_0132_00_01int4b_546e9ec6de6a5970b326da6f6280f1d4.html" target="_self">Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate ></a></td><td class="desc">Matrix multiply-add operation: S32 = S4 * U4 + S32 </td></tr>
|
||||
<tr id="row_0_0_34_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_018_00_018_00_0132_01_4_00_0132_00_01uint4b03e3b50dbcb30d0d1ac062f3a9d5abef.html" target="_self">Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd ></a></td><td class="desc">Matrix multiply-add operation: S32 = U4 * S4 + S32 </td></tr>
|
||||
<tr id="row_0_0_35_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_018_00_018_00_0132_01_4_00_0132_00_01uint4b6d968039dde5c9f062ab15f90a8049fe.html" target="_self">Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate ></a></td><td class="desc">Matrix multiply-add operation: S32 = U4 * S4 + S32 </td></tr>
|
||||
<tr id="row_0_0_36_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_018_00_018_00_0132_01_4_00_0132_00_01uint4bc4b6ba004e25c44bfd9266c61f937dfb.html" target="_self">Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd ></a></td><td class="desc">Matrix multiply-add operation: S32 = U4 * U4 + S32 </td></tr>
|
||||
<tr id="row_0_0_37_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_018_00_018_00_0132_01_4_00_0132_00_01uint4b451d5cf5d7e8cbbe476afe3dab5c09b2.html" target="_self">Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate ></a></td><td class="desc">Matrix multiply-add operation: S32 = U4 * U4 + S32 </td></tr>
|
||||
<tr id="row_0_0_38_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_018_00_018_00_014_01_4_00_018_00_01half__t_b0242d7a01097510effbc4718040d3e5.html" target="_self">Mma< gemm::GemmShape< 8, 8, 4 >, 8, half_t, layout::ColumnMajor, half_t, layout::ColumnMajor, float, layout::RowMajor, OpMultiplyAdd ></a></td><td class="desc">Matrix multiply-add operation: F32 = F16 * F16 + F32 </td></tr>
|
||||
<tr id="row_0_0_39_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_018_00_018_00_014_01_4_00_018_00_01half__t_c7f88bfd32a544fba8111d2dcadeab11.html" target="_self">Mma< gemm::GemmShape< 8, 8, 4 >, 8, half_t, layout::ColumnMajor, half_t, layout::ColumnMajor, half_t, layout::RowMajor, OpMultiplyAdd ></a></td><td class="desc">Matrix multiply-add operation: F16 = F16 * F16 + F16 </td></tr>
|
||||
<tr id="row_0_0_40_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_018_00_018_00_014_01_4_00_018_00_01half__t_44a3b2a8df88a2b067f1284515cb5371.html" target="_self">Mma< gemm::GemmShape< 8, 8, 4 >, 8, half_t, layout::ColumnMajor, half_t, layout::RowMajor, float, layout::RowMajor, OpMultiplyAdd ></a></td><td class="desc">Matrix multiply-add operation: F32 = F16 * F16 + F32 </td></tr>
|
||||
<tr id="row_0_0_41_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_018_00_018_00_014_01_4_00_018_00_01half__t_4b7308177b308a272c1889fbe9670275.html" target="_self">Mma< gemm::GemmShape< 8, 8, 4 >, 8, half_t, layout::ColumnMajor, half_t, layout::RowMajor, half_t, layout::RowMajor, OpMultiplyAdd ></a></td><td class="desc">Matrix multiply-add operation: F16 = F16 * F16 + F16 </td></tr>
|
||||
<tr id="row_0_0_42_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_018_00_018_00_014_01_4_00_018_00_01half__t_5a9888862cebd333ecaf11f7262f77d4.html" target="_self">Mma< gemm::GemmShape< 8, 8, 4 >, 8, half_t, layout::RowMajor, half_t, layout::ColumnMajor, float, layout::RowMajor, OpMultiplyAdd ></a></td><td class="desc">Matrix multiply-add operation: F32 = F16 * F16 + F32 </td></tr>
|
||||
<tr id="row_0_0_43_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_018_00_018_00_014_01_4_00_018_00_01half__t_31defda8ea2b7d855642ffd77da1a411.html" target="_self">Mma< gemm::GemmShape< 8, 8, 4 >, 8, half_t, layout::RowMajor, half_t, layout::ColumnMajor, half_t, layout::RowMajor, OpMultiplyAdd ></a></td><td class="desc">Matrix multiply-add operation: F16 = F16 * F16 + F16 </td></tr>
|
||||
<tr id="row_0_0_44_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_018_00_018_00_014_01_4_00_018_00_01half__t_839a7c8bb938d1661f4611e68f85d8cb.html" target="_self">Mma< gemm::GemmShape< 8, 8, 4 >, 8, half_t, layout::RowMajor, half_t, layout::RowMajor, float, layout::RowMajor, OpMultiplyAdd ></a></td><td class="desc">Matrix multiply-add operation: F32 = F16 * F16 + F32 </td></tr>
|
||||
<tr id="row_0_0_45_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_018_00_018_00_014_01_4_00_018_00_01half__t_73d9802d6b944a5299bc255887db6bbc.html" target="_self">Mma< gemm::GemmShape< 8, 8, 4 >, 8, half_t, layout::RowMajor, half_t, layout::RowMajor, half_t, layout::RowMajor, OpMultiplyAdd ></a></td><td class="desc">Matrix multiply-add operation: F16 = F16 * F16 + F16 </td></tr>
|
||||
<tr id="row_0_0_46_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1PtxWmma.html" target="_self">PtxWmma</a></td><td class="desc">WMMA Matrix multiply-add operation </td></tr>
|
||||
<tr id="row_0_0_47_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1PtxWmmaLoadA.html" target="_self">PtxWmmaLoadA</a></td><td class="desc">WMMA PTX string load for A, B, and C matrices </td></tr>
|
||||
<tr id="row_0_0_48_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1PtxWmmaLoadB.html" target="_self">PtxWmmaLoadB</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_0_49_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1PtxWmmaLoadC.html" target="_self">PtxWmmaLoadC</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_0_50_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1PtxWmmaStoreD.html" target="_self">PtxWmmaStoreD</a></td><td class="desc">WMMA store for matrix D </td></tr>
|
||||
<tr id="row_0_0_51_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Sm50.html" target="_self">Sm50</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_0_52_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Sm60.html" target="_self">Sm60</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_0_53_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Sm61.html" target="_self">Sm61</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_0_54_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Sm70.html" target="_self">Sm70</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_0_55_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Sm72.html" target="_self">Sm72</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_0_56_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Sm75.html" target="_self">Sm75</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_0_57_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Wmma_3_01Shape___00_01cutlass_1_1half__t_00_01LayoutA___00_01cutlass_1_84e30c8cc93eeb7ca02f651bd16d4c38.html" target="_self">Wmma< Shape_, cutlass::half_t, LayoutA_, cutlass::half_t, LayoutB_, ElementC_, LayoutC_, cutlass::arch::OpMultiplyAdd ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_0_58_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Wmma_3_01Shape___00_01cutlass_1_1int4b__t_00_01LayoutA___00_01cutlass_16fd808a90b3cf9d7cfc99f30888ca3fe.html" target="_self">Wmma< Shape_, cutlass::int4b_t, LayoutA_, cutlass::int4b_t, LayoutB_, int32_t, LayoutC_, cutlass::arch::OpMultiplyAdd ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_0_59_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Wmma_3_01Shape___00_01cutlass_1_1uint1b__t_00_01LayoutA___00_01cutlass_c80a7ea4d219cd9b13b560b493338028.html" target="_self">Wmma< Shape_, cutlass::uint1b_t, LayoutA_, cutlass::uint1b_t, LayoutB_, int32_t, LayoutC_, cutlass::arch::OpXorPopc ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_0_60_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Wmma_3_01Shape___00_01int8__t_00_01LayoutA___00_01int8__t_00_01LayoutB_505c57bb6818a941dc16f00cf35a9ec0.html" target="_self">Wmma< Shape_, int8_t, LayoutA_, int8_t, LayoutB_, int32_t, LayoutC_, cutlass::arch::OpMultiplyAdd ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_0_61_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1arch_1_1Wmma_3_01Shape___00_01uint8__t_00_01LayoutA___00_01uint8__t_00_01Layout219a464a1248ebfc37aa29bcb10cb1b0.html" target="_self">Wmma< Shape_, uint8_t, LayoutA_, uint8_t, LayoutB_, int32_t, LayoutC_, cutlass::arch::OpMultiplyAdd ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_1_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;"> </span><span id="arr_0_1_" class="arrow" onclick="toggleFolder('0_1_')">►</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacecutlass_1_1device__memory.html" target="_self">device_memory</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_1_0_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span id="arr_0_1_0_" class="arrow" onclick="toggleFolder('0_1_0_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1device__memory_1_1allocation.html" target="_self">allocation</a></td><td class="desc">Device allocation abstraction that tracks size and capacity </td></tr>
|
||||
<tr id="row_0_1_0_0_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1device__memory_1_1allocation_1_1deleter.html" target="_self">deleter</a></td><td class="desc">Delete functor for CUDA device memory </td></tr>
|
||||
<tr id="row_0_2_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;"> </span><span id="arr_0_2_" class="arrow" onclick="toggleFolder('0_2_')">►</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacecutlass_1_1epilogue.html" target="_self">epilogue</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_0_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span id="arr_0_2_0_" class="arrow" onclick="toggleFolder('0_2_0_')">►</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacecutlass_1_1epilogue_1_1thread.html" target="_self">thread</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_0_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_2_0_0_" class="arrow" onclick="toggleFolder('0_2_0_0_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1epilogue_1_1thread_1_1Convert.html" target="_self">Convert</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_0_0_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1thread_1_1Convert_1_1Params.html" target="_self">Params</a></td><td class="desc">Host-constructable parameters structure </td></tr>
|
||||
<tr id="row_0_2_0_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_2_0_1_" class="arrow" onclick="toggleFolder('0_2_0_1_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1epilogue_1_1thread_1_1LinearCombination.html" target="_self">LinearCombination</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_0_1_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1thread_1_1LinearCombination_1_1Params.html" target="_self">Params</a></td><td class="desc">Host-constructable parameters structure </td></tr>
|
||||
<tr id="row_0_2_0_2_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_2_0_2_" class="arrow" onclick="toggleFolder('0_2_0_2_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1epilogue_1_1thread_1_1LinearCombinationClamp.html" target="_self">LinearCombinationClamp</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_0_2_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1thread_1_1LinearCombinationClamp_1_1Params.html" target="_self">Params</a></td><td class="desc">Host-constructable parameters structure </td></tr>
|
||||
<tr id="row_0_2_0_3_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_2_0_3_" class="arrow" onclick="toggleFolder('0_2_0_3_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1epilogue_1_1thread_1_1LinearCombinationRelu.html" target="_self">LinearCombinationRelu</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_0_3_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1thread_1_1LinearCombinationRelu_1_1Params.html" target="_self">Params</a></td><td class="desc">Host-constructable parameters structure </td></tr>
|
||||
<tr id="row_0_2_0_4_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_2_0_4_" class="arrow" onclick="toggleFolder('0_2_0_4_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1epilogue_1_1thread_1_1LinearCombinationRelu_3_01ElementOutput___00_01Count_00_01int_00_01float_00_01Round_01_4.html" target="_self">LinearCombinationRelu< ElementOutput_, Count, int, float, Round ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_0_4_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1thread_1_1LinearCombinationRelu_3_01ElementOutput___00_01Count_00_00274a94522c46cd041d0b10d484e2ef3.html" target="_self">Params</a></td><td class="desc">Host-constructable parameters structure </td></tr>
|
||||
<tr id="row_0_2_0_5_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_2_0_5_" class="arrow" onclick="toggleFolder('0_2_0_5_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1epilogue_1_1thread_1_1ReductionOpPlus.html" target="_self">ReductionOpPlus</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_0_5_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1thread_1_1ReductionOpPlus_1_1Params.html" target="_self">Params</a></td><td class="desc">Host-constructable parameters structure </td></tr>
|
||||
<tr id="row_0_2_1_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span id="arr_0_2_1_" class="arrow" onclick="toggleFolder('0_2_1_')">►</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacecutlass_1_1epilogue_1_1threadblock.html" target="_self">threadblock</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_1_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_2_1_0_" class="arrow" onclick="toggleFolder('0_2_1_0_')">►</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacecutlass_1_1epilogue_1_1threadblock_1_1detail.html" target="_self">detail</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_1_0_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1detail_1_1RowArrangement.html" target="_self">RowArrangement</a></td><td class="desc"><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1detail_1_1RowArrangement.html" title="RowArrangement determines how one or more warps cover a region of consecutive rows. ">RowArrangement</a> determines how one or more warps cover a region of consecutive rows </td></tr>
|
||||
<tr id="row_0_2_1_0_1_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1detail_1_1RowArrangement_3_01Shape_00_01WarpsRemaini91159e6f7e123d881e3ec45101fa4f81.html" target="_self">RowArrangement< Shape, WarpsRemaining, ElementsPerAccess, ElementSize, false ></a></td><td class="desc"><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1detail_1_1RowArrangement.html" title="RowArrangement determines how one or more warps cover a region of consecutive rows. ">RowArrangement</a> in which each warp's access is a 1D tiled arrangement </td></tr>
|
||||
<tr id="row_0_2_1_0_2_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span id="arr_0_2_1_0_2_" class="arrow" onclick="toggleFolder('0_2_1_0_2_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1detail_1_1RowArrangement_3_01Shape_00_01WarpsRemaini6d8790249bf12cac580da73bb37eb791.html" target="_self">RowArrangement< Shape, WarpsRemaining, ElementsPerAccess, ElementSize, true ></a></td><td class="desc"><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1detail_1_1RowArrangement.html" title="RowArrangement determines how one or more warps cover a region of consecutive rows. ">RowArrangement</a> in which each warp's access is a 2D tiled arrangement </td></tr>
|
||||
<tr id="row_0_2_1_0_2_0_" style="display:none;"><td class="entry"><span style="width:96px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1detail_1_1RowArrangement_3_01Shape_00_01WarpsRemainief28e98b3f284469f271d28aba73de2e.html" target="_self">Detail</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_1_1_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1DefaultEpilogueComplexTensorOp.html" target="_self">DefaultEpilogueComplexTensorOp</a></td><td class="desc">Defines sensible defaults for epilogues for TensorOps </td></tr>
|
||||
<tr id="row_0_2_1_2_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1DefaultEpilogueSimt.html" target="_self">DefaultEpilogueSimt</a></td><td class="desc">Defines sensible defaults for epilogues for SimtOps </td></tr>
|
||||
<tr id="row_0_2_1_3_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1DefaultEpilogueTensorOp.html" target="_self">DefaultEpilogueTensorOp</a></td><td class="desc">Defines sensible defaults for epilogues for TensorOps </td></tr>
|
||||
<tr id="row_0_2_1_4_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1DefaultEpilogueVoltaTensorOp.html" target="_self">DefaultEpilogueVoltaTensorOp</a></td><td class="desc">Defines sensible defaults for epilogues for TensorOps </td></tr>
|
||||
<tr id="row_0_2_1_5_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1DefaultEpilogueWmmaTensorOp.html" target="_self">DefaultEpilogueWmmaTensorOp</a></td><td class="desc">Defines sensible defaults for epilogues for WMMA TensorOps </td></tr>
|
||||
<tr id="row_0_2_1_6_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1DefaultInterleavedEpilogueTensorOp.html" target="_self">DefaultInterleavedEpilogueTensorOp</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_1_7_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_2_1_7_" class="arrow" onclick="toggleFolder('0_2_1_7_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1DefaultInterleavedThreadMapTensorOp.html" target="_self">DefaultInterleavedThreadMapTensorOp</a></td><td class="desc">Defines the optimal thread map for TensorOp accumulator layouts </td></tr>
|
||||
<tr id="row_0_2_1_7_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1DefaultInterleavedThreadMapTensorOp_1_1Detail.html" target="_self">Detail</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_1_8_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_2_1_8_" class="arrow" onclick="toggleFolder('0_2_1_8_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1DefaultThreadMapSimt.html" target="_self">DefaultThreadMapSimt</a></td><td class="desc">Defines the optimal thread map for SIMT accumulator layouts </td></tr>
|
||||
<tr id="row_0_2_1_8_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1DefaultThreadMapSimt_1_1Detail.html" target="_self">Detail</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_1_9_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_2_1_9_" class="arrow" onclick="toggleFolder('0_2_1_9_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1DefaultThreadMapTensorOp.html" target="_self">DefaultThreadMapTensorOp</a></td><td class="desc">Defines the optimal thread map for TensorOp accumulator layouts </td></tr>
|
||||
<tr id="row_0_2_1_9_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1DefaultThreadMapTensorOp_1_1Detail.html" target="_self">Detail</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_1_10_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1DefaultThreadMapVoltaTensorOp.html" target="_self">DefaultThreadMapVoltaTensorOp</a></td><td class="desc">Defines the optimal thread map for TensorOp accumulator layouts </td></tr>
|
||||
<tr id="row_0_2_1_11_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_2_1_11_" class="arrow" onclick="toggleFolder('0_2_1_11_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1DefaultThreadMapVoltaTensorOp_3_01ThreadblockShape__95db04b7b72e34283958bd7fbf851d16.html" target="_self">DefaultThreadMapVoltaTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, ElementOutput_, ElementsPerAccess, float ></a></td><td class="desc">Defines the optimal thread map for TensorOp accumulator layouts </td></tr>
|
||||
<tr id="row_0_2_1_11_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1DefaultThreadMapVoltaTensorOp_3_01ThreadblockShape__52116c60c62f0fd520071558e42b814f.html" target="_self">Detail</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_1_12_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_2_1_12_" class="arrow" onclick="toggleFolder('0_2_1_12_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1DefaultThreadMapVoltaTensorOp_3_01ThreadblockShape__d58c94abc36b7c5c109b55202c6992e7.html" target="_self">DefaultThreadMapVoltaTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, ElementOutput_, ElementsPerAccess, half_t ></a></td><td class="desc">Defines the optimal thread map for TensorOp accumulator layouts </td></tr>
|
||||
<tr id="row_0_2_1_12_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1DefaultThreadMapVoltaTensorOp_3_01ThreadblockShape__4433cc988100e98097a748d2670fb0fc.html" target="_self">Detail</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_1_13_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_2_1_13_" class="arrow" onclick="toggleFolder('0_2_1_13_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1DefaultThreadMapWmmaTensorOp.html" target="_self">DefaultThreadMapWmmaTensorOp</a></td><td class="desc">Defines the optimal thread map for Wmma TensorOp accumulator layouts </td></tr>
|
||||
<tr id="row_0_2_1_13_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1DefaultThreadMapWmmaTensorOp_1_1Detail.html" target="_self">Detail</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_1_14_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_2_1_14_" class="arrow" onclick="toggleFolder('0_2_1_14_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1epilogue_1_1threadblock_1_1DirectEpilogueTensorOp.html" target="_self">DirectEpilogueTensorOp</a></td><td class="desc"><a class="el" href="classcutlass_1_1epilogue_1_1threadblock_1_1Epilogue.html" title="Epilogue operator without splitk. ">Epilogue</a> operator </td></tr>
|
||||
<tr id="row_0_2_1_14_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1DirectEpilogueTensorOp_1_1Params.html" target="_self">Params</a></td><td class="desc">Parameters structure for host-constructible state </td></tr>
|
||||
<tr id="row_0_2_1_14_1_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1DirectEpilogueTensorOp_1_1SharedStorage.html" target="_self">SharedStorage</a></td><td class="desc">Shared storage allocation needed by the epilogue </td></tr>
|
||||
<tr id="row_0_2_1_15_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1epilogue_1_1threadblock_1_1Epilogue.html" target="_self">Epilogue</a></td><td class="desc"><a class="el" href="classcutlass_1_1epilogue_1_1threadblock_1_1Epilogue.html" title="Epilogue operator without splitk. ">Epilogue</a> operator without splitk </td></tr>
|
||||
<tr id="row_0_2_1_16_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_2_1_16_" class="arrow" onclick="toggleFolder('0_2_1_16_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1epilogue_1_1threadblock_1_1EpilogueBase.html" target="_self">EpilogueBase</a></td><td class="desc">Base class for epilogues defining warp-level </td></tr>
|
||||
<tr id="row_0_2_1_16_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1EpilogueBase_1_1SharedStorage.html" target="_self">SharedStorage</a></td><td class="desc">Shared storage allocation needed by the epilogue </td></tr>
|
||||
<tr id="row_0_2_1_17_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_2_1_17_" class="arrow" onclick="toggleFolder('0_2_1_17_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1epilogue_1_1threadblock_1_1InterleavedEpilogue.html" target="_self">InterleavedEpilogue</a></td><td class="desc"><a class="el" href="classcutlass_1_1epilogue_1_1threadblock_1_1Epilogue.html" title="Epilogue operator without splitk. ">Epilogue</a> operator without splitk </td></tr>
|
||||
<tr id="row_0_2_1_17_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1InterleavedEpilogue_1_1SharedStorage.html" target="_self">SharedStorage</a></td><td class="desc">Shared storage allocation needed by the epilogue </td></tr>
|
||||
<tr id="row_0_2_1_18_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_2_1_18_" class="arrow" onclick="toggleFolder('0_2_1_18_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1InterleavedOutputTileThreadMap.html" target="_self">InterleavedOutputTileThreadMap</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_1_18_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1InterleavedOutputTileThreadMap_1_1Detail.html" target="_self">Detail</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_1_19_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_2_1_19_" class="arrow" onclick="toggleFolder('0_2_1_19_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1epilogue_1_1threadblock_1_1InterleavedPredicatedTileIterator.html" target="_self">InterleavedPredicatedTileIterator</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_1_19_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1InterleavedPredicatedTileIterator_1_1Mask.html" target="_self">Mask</a></td><td class="desc"><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1InterleavedPredicatedTileIterator_1_1Mask.html" title="Mask object. ">Mask</a> object </td></tr>
|
||||
<tr id="row_0_2_1_19_1_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1InterleavedPredicatedTileIterator_1_1Params.html" target="_self">Params</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_1_20_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_2_1_20_" class="arrow" onclick="toggleFolder('0_2_1_20_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1OutputTileOptimalThreadMap.html" target="_self">OutputTileOptimalThreadMap</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_1_20_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1OutputTileOptimalThreadMap_1_1CompactedThreadMap.html" target="_self">CompactedThreadMap</a></td><td class="desc">Compacted thread map in which the 4D region is contiguous </td></tr>
|
||||
<tr id="row_0_2_1_20_1_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1OutputTileOptimalThreadMap_1_1Detail.html" target="_self">Detail</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_1_21_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1OutputTileShape.html" target="_self">OutputTileShape</a></td><td class="desc">Tuple defining point in output tile </td></tr>
|
||||
<tr id="row_0_2_1_22_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1OutputTileThreadMap.html" target="_self">OutputTileThreadMap</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_1_23_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_2_1_23_" class="arrow" onclick="toggleFolder('0_2_1_23_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1epilogue_1_1threadblock_1_1PredicatedTileIterator.html" target="_self">PredicatedTileIterator</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_1_23_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1PredicatedTileIterator_1_1Mask.html" target="_self">Mask</a></td><td class="desc"><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1PredicatedTileIterator_1_1Mask.html" title="Mask object. ">Mask</a> object </td></tr>
|
||||
<tr id="row_0_2_1_23_1_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1threadblock_1_1PredicatedTileIterator_1_1Params.html" target="_self">Params</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_1_24_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1epilogue_1_1threadblock_1_1SharedLoadIterator.html" target="_self">SharedLoadIterator</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_2_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span id="arr_0_2_2_" class="arrow" onclick="toggleFolder('0_2_2_')">►</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacecutlass_1_1epilogue_1_1warp.html" target="_self">warp</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_2_0_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1epilogue_1_1warp_1_1FragmentIteratorComplexTensorOp.html" target="_self">FragmentIteratorComplexTensorOp</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_2_1_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1epilogue_1_1warp_1_1FragmentIteratorComplexTensorOp_3_01WarpShape___00_01Operato8cf03c624cf3210c71b7cbd580b080f8.html" target="_self">FragmentIteratorComplexTensorOp< WarpShape_, OperatorShape_, OperatorElementC_, OperatorFragmentC_, layout::RowMajor ></a></td><td class="desc">Partial specialization for row-major shared memory </td></tr>
|
||||
<tr id="row_0_2_2_2_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1epilogue_1_1warp_1_1FragmentIteratorSimt.html" target="_self">FragmentIteratorSimt</a></td><td class="desc">Fragment iterator for SIMT accumulator arrangements </td></tr>
|
||||
<tr id="row_0_2_2_3_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1epilogue_1_1warp_1_1FragmentIteratorSimt_3_01WarpShape___00_01Operator___00_01la3f2abc523201c1b0228df99119ab88e1.html" target="_self">FragmentIteratorSimt< WarpShape_, Operator_, layout::RowMajor, MmaSimtPolicy_ ></a></td><td class="desc">Partial specialization for row-major shared memory </td></tr>
|
||||
<tr id="row_0_2_2_4_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1epilogue_1_1warp_1_1FragmentIteratorTensorOp.html" target="_self">FragmentIteratorTensorOp</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_2_5_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1epilogue_1_1warp_1_1FragmentIteratorTensorOp_3_01WarpShape___00_01OperatorShape_e459aab140a2ce78336e584f95886726.html" target="_self">FragmentIteratorTensorOp< WarpShape_, OperatorShape_, OperatorElementC_, OperatorFragmentC_, layout::ColumnMajorInterleaved< InterleavedK > ></a></td><td class="desc">Dedicated to interleaved layout </td></tr>
|
||||
<tr id="row_0_2_2_6_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1epilogue_1_1warp_1_1FragmentIteratorTensorOp_3_01WarpShape___00_01OperatorShape_5e78dabe303f20d76b00c600aab61eda.html" target="_self">FragmentIteratorTensorOp< WarpShape_, OperatorShape_, OperatorElementC_, OperatorFragmentC_, layout::RowMajor ></a></td><td class="desc">Partial specialization for row-major shared memory </td></tr>
|
||||
<tr id="row_0_2_2_7_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1epilogue_1_1warp_1_1FragmentIteratorVoltaTensorOp.html" target="_self">FragmentIteratorVoltaTensorOp</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_2_8_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1epilogue_1_1warp_1_1FragmentIteratorVoltaTensorOp_3_01WarpShape___00_01gemm_1_1Gdb805a2dc5571ac3b66e0fe6ffdcede2.html" target="_self">FragmentIteratorVoltaTensorOp< WarpShape_, gemm::GemmShape< 32, 32, 4 >, float, layout::RowMajor ></a></td><td class="desc">Partial specialization for row-major shared memory </td></tr>
|
||||
<tr id="row_0_2_2_9_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1epilogue_1_1warp_1_1FragmentIteratorVoltaTensorOp_3_01WarpShape___00_01gemm_1_1G16e08718cffa0989cce3fe8dbc4b075b.html" target="_self">FragmentIteratorVoltaTensorOp< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor ></a></td><td class="desc">Partial specialization for row-major shared memory </td></tr>
|
||||
<tr id="row_0_2_2_10_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1epilogue_1_1warp_1_1FragmentIteratorWmmaTensorOp.html" target="_self">FragmentIteratorWmmaTensorOp</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_2_11_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1epilogue_1_1warp_1_1FragmentIteratorWmmaTensorOp_3_01WarpShape___00_01OperatorShfdb1f120c6797383663f9fd11d0fc599.html" target="_self">FragmentIteratorWmmaTensorOp< WarpShape_, OperatorShape_, OperatorElementC_, OperatorFragmentC_, layout::RowMajor ></a></td><td class="desc">Partial specialization for row-major shared memory </td></tr>
|
||||
<tr id="row_0_2_2_12_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1warp_1_1SimtPolicy.html" target="_self">SimtPolicy</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_2_13_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1warp_1_1SimtPolicy_3_01WarpShape___00_01Operator___00_01layout_1_1Rcef1c60e23e997017ae176c92931151d.html" target="_self">SimtPolicy< WarpShape_, Operator_, layout::RowMajor, MmaSimtPolicy_ ></a></td><td class="desc">Partial specialization for row-major </td></tr>
|
||||
<tr id="row_0_2_2_14_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1warp_1_1TensorOpPolicy.html" target="_self">TensorOpPolicy</a></td><td class="desc">Policy details related to the epilogue </td></tr>
|
||||
<tr id="row_0_2_2_15_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1warp_1_1TensorOpPolicy_3_01WarpShape_00_01OperatorShape_00_01layout69549d10c3610d943987eb90e827bc05.html" target="_self">TensorOpPolicy< WarpShape, OperatorShape, layout::ColumnMajorInterleaved< InterleavedK > ></a></td><td class="desc">Partial specialization for column-major-interleaved </td></tr>
|
||||
<tr id="row_0_2_2_16_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1warp_1_1TensorOpPolicy_3_01WarpShape_00_01OperatorShape_00_01layout_1_1RowMajor_01_4.html" target="_self">TensorOpPolicy< WarpShape, OperatorShape, layout::RowMajor ></a></td><td class="desc">Partial specialization for row-major </td></tr>
|
||||
<tr id="row_0_2_2_17_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1epilogue_1_1warp_1_1TileIteratorSimt.html" target="_self">TileIteratorSimt</a></td><td class="desc">Template for reading and writing tiles of accumulators to shared memory </td></tr>
|
||||
<tr id="row_0_2_2_18_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1epilogue_1_1warp_1_1TileIteratorSimt_3_01WarpShape___00_01Operator___00_01Elemenf2bd262ed3e202b25d5802d83965bf3b.html" target="_self">TileIteratorSimt< WarpShape_, Operator_, Element_, layout::RowMajor, MmaSimtPolicy_ ></a></td><td class="desc">Template for reading and writing tiles of accumulators to shared memory </td></tr>
|
||||
<tr id="row_0_2_2_19_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1epilogue_1_1warp_1_1TileIteratorTensorOp.html" target="_self">TileIteratorTensorOp</a></td><td class="desc">Template for reading and writing tiles of accumulators to shared memory </td></tr>
|
||||
<tr id="row_0_2_2_20_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_2_2_20_" class="arrow" onclick="toggleFolder('0_2_2_20_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1epilogue_1_1warp_1_1TileIteratorTensorOp_3_01WarpShape___00_01OperatorShape___003cbb32beb84b4984cb7853662096d289.html" target="_self">TileIteratorTensorOp< WarpShape_, OperatorShape_, Element_, layout::RowMajor ></a></td><td class="desc">Template for reading and writing tiles of accumulators to shared memory </td></tr>
|
||||
<tr id="row_0_2_2_20_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1warp_1_1TileIteratorTensorOp_3_01WarpShape___00_01OperatorShape___05f11e023c9e6ee5f7a888fa4c5bbf6d1.html" target="_self">Detail</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_2_21_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1warp_1_1TileIteratorVoltaTensorOp.html" target="_self">TileIteratorVoltaTensorOp</a></td><td class="desc">Template for reading and writing tiles of accumulators to shared memory </td></tr>
|
||||
<tr id="row_0_2_2_22_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_2_2_22_" class="arrow" onclick="toggleFolder('0_2_2_22_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1epilogue_1_1warp_1_1TileIteratorVoltaTensorOp_3_01WarpShape___00_01gemm_1_1GemmS2fe0c60b727c738c622c18fc3dd76644.html" target="_self">TileIteratorVoltaTensorOp< WarpShape_, gemm::GemmShape< 32, 32, 4 >, float, layout::RowMajor ></a></td><td class="desc">Template for reading and writing tiles of accumulators to shared memory </td></tr>
|
||||
<tr id="row_0_2_2_22_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1warp_1_1TileIteratorVoltaTensorOp_3_01WarpShape___00_01gemm_1_1Gemm770cbca45441d295d5d7433e8222a700.html" target="_self">Detail</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_2_23_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_2_2_23_" class="arrow" onclick="toggleFolder('0_2_2_23_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1epilogue_1_1warp_1_1TileIteratorVoltaTensorOp_3_01WarpShape___00_01gemm_1_1GemmSa0ceeeddc22575876eb977da7f5416a8.html" target="_self">TileIteratorVoltaTensorOp< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor ></a></td><td class="desc">Template for reading and writing tiles of accumulators to shared memory </td></tr>
|
||||
<tr id="row_0_2_2_23_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1warp_1_1TileIteratorVoltaTensorOp_3_01WarpShape___00_01gemm_1_1Gemmffcab2297c8de8d0013602a39c525b78.html" target="_self">Detail</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_2_24_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1epilogue_1_1warp_1_1TileIteratorWmmaTensorOp.html" target="_self">TileIteratorWmmaTensorOp</a></td><td class="desc">Template for reading and writing tiles of accumulators to shared memory </td></tr>
|
||||
<tr id="row_0_2_2_25_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1epilogue_1_1warp_1_1TileIteratorWmmaTensorOp_3_01WarpShape___00_01OperatorShape_fd6a91cd8bbd07ecd1344326b830e3a4.html" target="_self">TileIteratorWmmaTensorOp< WarpShape_, OperatorShape_, OperatorFragment_, layout::RowMajor ></a></td><td class="desc">Template for reading and writing tiles of accumulators to shared memory </td></tr>
|
||||
<tr id="row_0_2_2_26_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1warp_1_1VoltaTensorOpPolicy.html" target="_self">VoltaTensorOpPolicy</a></td><td class="desc">Policy details related to the epilogue </td></tr>
|
||||
<tr id="row_0_2_2_27_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1warp_1_1VoltaTensorOpPolicy_3_01WarpShape___00_01gemm_1_1GemmShape_136ce744d4c1c6e8707f5a9785196194.html" target="_self">VoltaTensorOpPolicy< WarpShape_, gemm::GemmShape< 32, 32, 4 >, float, layout::RowMajor ></a></td><td class="desc">Partial specialization for row-major </td></tr>
|
||||
<tr id="row_0_2_2_28_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1warp_1_1VoltaTensorOpPolicy_3_01WarpShape___00_01gemm_1_1GemmShape_1d48185f49e4d066f8e9327bf0856b7f.html" target="_self">VoltaTensorOpPolicy< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor ></a></td><td class="desc">Partial specialization for row-major </td></tr>
|
||||
<tr id="row_0_2_3_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span id="arr_0_2_3_" class="arrow" onclick="toggleFolder('0_2_3_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1epilogue_1_1EpilogueWorkspace.html" target="_self">EpilogueWorkspace</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_2_3_0_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1EpilogueWorkspace_1_1Params.html" target="_self">Params</a></td><td class="desc">Parameters structure </td></tr>
|
||||
<tr id="row_0_2_3_1_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1epilogue_1_1EpilogueWorkspace_1_1SharedStorage.html" target="_self">SharedStorage</a></td><td class="desc">Shared storage allocation needed by the epilogue </td></tr>
|
||||
<tr id="row_0_3_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;"> </span><span id="arr_0_3_" class="arrow" onclick="toggleFolder('0_3_')">►</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacecutlass_1_1gemm.html" target="_self">gemm</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_0_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span id="arr_0_3_0_" class="arrow" onclick="toggleFolder('0_3_0_')">►</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacecutlass_1_1gemm_1_1device.html" target="_self">device</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_0_0_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1device_1_1DefaultGemmConfiguration.html" target="_self">DefaultGemmConfiguration</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_0_1_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1device_1_1DefaultGemmConfiguration_3_01arch_1_1OpClassSimt_00_01ArchTag286687c5e6abe22d241f789fe344a465.html" target="_self">DefaultGemmConfiguration< arch::OpClassSimt, ArchTag, ElementA, ElementB, ElementC, ElementAccumulator ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_0_2_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1device_1_1DefaultGemmConfiguration_3_01arch_1_1OpClassSimt_00_01ArchTag3026e48abb8c905d1cc6d13d669700e4.html" target="_self">DefaultGemmConfiguration< arch::OpClassSimt, ArchTag, int8_t, int8_t, ElementC, int32_t ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_0_3_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1device_1_1DefaultGemmConfiguration_3_01arch_1_1OpClassTensorOp_00_01arc567cad318a31d04b70ea615d6321decd.html" target="_self">DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm70, ElementA, ElementB, ElementC, ElementAccumulator ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_0_4_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1device_1_1DefaultGemmConfiguration_3_01arch_1_1OpClassTensorOp_00_01arcde61af9be1337dac1fdb210e7e7a6e01.html" target="_self">DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, ElementA, ElementB, ElementC, ElementAccumulator ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_0_5_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1device_1_1DefaultGemmConfiguration_3_01arch_1_1OpClassTensorOp_00_01arc485a4f0b5a7d2d4ab2c1a24da6328048.html" target="_self">DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, int4b_t, int4b_t, ElementC, int32_t ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_0_6_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1device_1_1DefaultGemmConfiguration_3_01arch_1_1OpClassTensorOp_00_01arc8e2604a56dff3a7595da9ee0604ae55e.html" target="_self">DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, int4b_t, uint4b_t, ElementC, int32_t ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_0_7_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1device_1_1DefaultGemmConfiguration_3_01arch_1_1OpClassTensorOp_00_01arc4fada4957d463c80a2831e47f28157c4.html" target="_self">DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, int8_t, int8_t, ElementC, int32_t ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_0_8_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1device_1_1DefaultGemmConfiguration_3_01arch_1_1OpClassTensorOp_00_01arc8ab5fd2693c6a6ec43e447acb07f784c.html" target="_self">DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, int8_t, uint8_t, ElementC, int32_t ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_0_9_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1device_1_1DefaultGemmConfiguration_3_01arch_1_1OpClassTensorOp_00_01arcffcf31256aed23d4d8d0eab627bc0cad.html" target="_self">DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, uint4b_t, int4b_t, ElementC, int32_t ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_0_10_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1device_1_1DefaultGemmConfiguration_3_01arch_1_1OpClassTensorOp_00_01arcb2e258b7bd321c633dd65d3ebcf6414a.html" target="_self">DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, uint4b_t, uint4b_t, ElementC, int32_t ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_0_11_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1device_1_1DefaultGemmConfiguration_3_01arch_1_1OpClassTensorOp_00_01arcb27bf218007928652d5b803193eab473.html" target="_self">DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, uint8_t, int8_t, ElementC, int32_t ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_0_12_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1device_1_1DefaultGemmConfiguration_3_01arch_1_1OpClassTensorOp_00_01arcfea0f3503156e8e3fba6456f0cedafdd.html" target="_self">DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, uint8_t, uint8_t, ElementC, int32_t ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_0_13_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1device_1_1DefaultGemmConfiguration_3_01arch_1_1OpClassWmmaTensorOp_00_0884059ecad03bea3e86c4cf722226097.html" target="_self">DefaultGemmConfiguration< arch::OpClassWmmaTensorOp, ArchTag, ElementA, ElementB, ElementC, ElementAccumulator ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_0_14_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_3_0_14_" class="arrow" onclick="toggleFolder('0_3_0_14_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1device_1_1Gemm.html" target="_self">Gemm</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_0_14_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1device_1_1Gemm_1_1Arguments.html" target="_self">Arguments</a></td><td class="desc">Argument structure </td></tr>
|
||||
<tr id="row_0_3_0_15_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_3_0_15_" class="arrow" onclick="toggleFolder('0_3_0_15_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1device_1_1Gemm_3_01ElementA___00_01LayoutA___00_01ElementB___00_01Layout4d0960ae6b1d1bf19e6239dbd002249c.html" target="_self">Gemm< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, SplitKSerial, Operator_, IsBetaZero ></a></td><td class="desc">Partial specialization for column-major output exchanges problem size and operand </td></tr>
|
||||
<tr id="row_0_3_0_15_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1device_1_1Gemm_3_01ElementA___00_01LayoutA___00_01ElementB___00_01Layou1b211cc9c97c022d8fe10f2dd32c8709.html" target="_self">Arguments</a></td><td class="desc">Argument structure </td></tr>
|
||||
<tr id="row_0_3_0_16_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_3_0_16_" class="arrow" onclick="toggleFolder('0_3_0_16_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmBatched.html" target="_self">GemmBatched</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_0_16_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1device_1_1GemmBatched_1_1Arguments.html" target="_self">Arguments</a></td><td class="desc">Argument structure </td></tr>
|
||||
<tr id="row_0_3_0_17_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_3_0_17_" class="arrow" onclick="toggleFolder('0_3_0_17_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmBatched_3_01ElementA___00_01LayoutA___00_01ElementB___00_0c9bb6f4463ab6085e6008b5d5ad6abfd.html" target="_self">GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ ></a></td><td class="desc">Partial specialization for column-major output exchanges problem size and operand </td></tr>
|
||||
<tr id="row_0_3_0_17_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1device_1_1GemmBatched_3_01ElementA___00_01LayoutA___00_01ElementB___00_213d78696663f4231cd52c6a277c60e5.html" target="_self">Arguments</a></td><td class="desc">Argument structure </td></tr>
|
||||
<tr id="row_0_3_0_18_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_3_0_18_" class="arrow" onclick="toggleFolder('0_3_0_18_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmComplex.html" target="_self">GemmComplex</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_0_18_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1device_1_1GemmComplex_1_1Arguments.html" target="_self">Arguments</a></td><td class="desc">Argument structure </td></tr>
|
||||
<tr id="row_0_3_0_19_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_3_0_19_" class="arrow" onclick="toggleFolder('0_3_0_19_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmComplex_3_01ElementA___00_01LayoutA___00_01ElementB___00_07c56401b4df75709ae636675d9980a9a.html" target="_self">GemmComplex< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, TransformA, TransformB, SplitKSerial ></a></td><td class="desc">Partial specialization for column-major output exchanges problem size and operand </td></tr>
|
||||
<tr id="row_0_3_0_19_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1device_1_1GemmComplex_3_01ElementA___00_01LayoutA___00_01ElementB___00_a3923967cafb5cb9774c320dc24baa77.html" target="_self">Arguments</a></td><td class="desc">Argument structure </td></tr>
|
||||
<tr id="row_0_3_0_20_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_3_0_20_" class="arrow" onclick="toggleFolder('0_3_0_20_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html" target="_self">GemmSplitKParallel</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_0_20_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel_1_1Arguments.html" target="_self">Arguments</a></td><td class="desc">Argument structure </td></tr>
|
||||
<tr id="row_0_3_0_21_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_3_0_21_" class="arrow" onclick="toggleFolder('0_3_0_21_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel_3_01ElementA___00_01LayoutA___00_01ElementBbe7c1f7154ad5b5bf9d4d28301e2b457.html" target="_self">GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ ></a></td><td class="desc">Partial specialization for column-major output </td></tr>
|
||||
<tr id="row_0_3_0_21_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel_3_01ElementA___00_01LayoutA___00_01Elementafcb1aeaf2035a7ac769d7acc233423b.html" target="_self">Arguments</a></td><td class="desc">Argument structure </td></tr>
|
||||
<tr id="row_0_3_1_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span id="arr_0_3_1_" class="arrow" onclick="toggleFolder('0_3_1_')">►</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacecutlass_1_1gemm_1_1kernel.html" target="_self">kernel</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_1_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_3_1_0_" class="arrow" onclick="toggleFolder('0_3_1_0_')">►</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacecutlass_1_1gemm_1_1kernel_1_1detail.html" target="_self">detail</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_1_0_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1kernel_1_1detail_1_1GemvBatchedStridedEpilogueScaling.html" target="_self">GemvBatchedStridedEpilogueScaling</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_1_1_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1kernel_1_1DefaultGemm.html" target="_self">DefaultGemm</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_1_2_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1kernel_1_1DefaultGemm_3_01ElementA_00_01layout_1_1ColumnMajorInterleave661fe54d13cc2c9153dcdf31e4beaa30.html" target="_self">DefaultGemm< ElementA, layout::ColumnMajorInterleaved< InterleavedK >, kAlignmentA, ElementB, layout::RowMajorInterleaved< InterleavedK >, kAlignmentB, ElementC, layout::ColumnMajorInterleaved< InterleavedK >, int32_t, arch::OpClassTensorOp, arch::Sm75, ThreadblockShape, WarpShape, InstructionShape, EpilogueOutputOp, ThreadblockSwizzle, 2, SplitKSerial, Operator, IsBetaZero ></a></td><td class="desc">Partial specialization for Turing Integer Matrix Multiply Interleaved layout </td></tr>
|
||||
<tr id="row_0_3_1_3_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1kernel_1_1DefaultGemm_3_01ElementA_00_01LayoutA_00_01kAlignmentA_00_01Edd80343e6570718ed237122e4ebf7fb5.html" target="_self">DefaultGemm< ElementA, LayoutA, kAlignmentA, ElementB, LayoutB, kAlignmentB, ElementC, layout::RowMajor, ElementAccumulator, arch::OpClassSimt, ArchTag, ThreadblockShape, WarpShape, GemmShape< 1, 1, 1 >, EpilogueOutputOp, ThreadblockSwizzle, 2, SplitKSerial, Operator ></a></td><td class="desc">Partial specialization for SIMT </td></tr>
|
||||
<tr id="row_0_3_1_4_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1kernel_1_1DefaultGemm_3_01ElementA_00_01LayoutA_00_01kAlignmentA_00_01E044b039b2fe402f29b04a9f5feee5342.html" target="_self">DefaultGemm< ElementA, LayoutA, kAlignmentA, ElementB, LayoutB, kAlignmentB, ElementC, layout::RowMajor, ElementAccumulator, arch::OpClassTensorOp, arch::Sm70, ThreadblockShape, WarpShape, GemmShape< 8, 8, 4 >, EpilogueOutputOp, ThreadblockSwizzle, 2, SplitKSerial, Operator ></a></td><td class="desc">Partial specialization for Volta architecture </td></tr>
|
||||
<tr id="row_0_3_1_5_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1kernel_1_1DefaultGemm_3_01ElementA_00_01LayoutA_00_01kAlignmentA_00_01E5d78d37a9ae2ec08d7d477d571df036e.html" target="_self">DefaultGemm< ElementA, LayoutA, kAlignmentA, ElementB, LayoutB, kAlignmentB, ElementC, layout::RowMajor, ElementAccumulator, arch::OpClassTensorOp, arch::Sm75, ThreadblockShape, WarpShape, InstructionShape, EpilogueOutputOp, ThreadblockSwizzle, 2, SplitKSerial, Operator ></a></td><td class="desc">Partial specialization for Turing Architecture </td></tr>
|
||||
<tr id="row_0_3_1_6_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1kernel_1_1DefaultGemm_3_01int8__t_00_01LayoutA_00_01kAlignmentA_00_01inf48440732c1c5f42ddbfaba179861815.html" target="_self">DefaultGemm< int8_t, LayoutA, kAlignmentA, int8_t, LayoutB, kAlignmentB, ElementC, LayoutC, ElementAccumulator, arch::OpClassSimt, ArchTag, ThreadblockShape, WarpShape, GemmShape< 1, 1, 4 >, EpilogueOutputOp, ThreadblockSwizzle, 2, SplitKSerial, Operator, false ></a></td><td class="desc">Partial specialization for SIMT DP4A </td></tr>
|
||||
<tr id="row_0_3_1_7_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1kernel_1_1DefaultGemmSplitKParallel.html" target="_self">DefaultGemmSplitKParallel</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_1_8_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1kernel_1_1DefaultGemv.html" target="_self">DefaultGemv</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_1_9_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_3_1_9_" class="arrow" onclick="toggleFolder('0_3_1_9_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1kernel_1_1Gemm.html" target="_self">Gemm</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_1_9_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1kernel_1_1Gemm_1_1Params.html" target="_self">Params</a></td><td class="desc">Parameters structure </td></tr>
|
||||
<tr id="row_0_3_1_9_1_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="unioncutlass_1_1gemm_1_1kernel_1_1Gemm_1_1SharedStorage.html" target="_self">SharedStorage</a></td><td class="desc">Shared memory storage structure </td></tr>
|
||||
<tr id="row_0_3_1_10_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_3_1_10_" class="arrow" onclick="toggleFolder('0_3_1_10_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1kernel_1_1GemmBatched.html" target="_self">GemmBatched</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_1_10_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1kernel_1_1GemmBatched_1_1Params.html" target="_self">Params</a></td><td class="desc">Parameters structure </td></tr>
|
||||
<tr id="row_0_3_1_10_1_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="unioncutlass_1_1gemm_1_1kernel_1_1GemmBatched_1_1SharedStorage.html" target="_self">SharedStorage</a></td><td class="desc">Shared memory storage structure </td></tr>
|
||||
<tr id="row_0_3_1_11_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_3_1_11_" class="arrow" onclick="toggleFolder('0_3_1_11_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1kernel_1_1GemmSplitKParallel.html" target="_self">GemmSplitKParallel</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_1_11_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1kernel_1_1GemmSplitKParallel_1_1Params.html" target="_self">Params</a></td><td class="desc">Parameters structure </td></tr>
|
||||
<tr id="row_0_3_1_11_1_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="unioncutlass_1_1gemm_1_1kernel_1_1GemmSplitKParallel_1_1SharedStorage.html" target="_self">SharedStorage</a></td><td class="desc">Shared memory storage structure </td></tr>
|
||||
<tr id="row_0_3_2_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span id="arr_0_3_2_" class="arrow" onclick="toggleFolder('0_3_2_')">►</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacecutlass_1_1gemm_1_1thread.html" target="_self">thread</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_2_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_3_2_0_" class="arrow" onclick="toggleFolder('0_3_2_0_')">►</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacecutlass_1_1gemm_1_1thread_1_1detail.html" target="_self">detail</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_2_0_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1thread_1_1detail_1_1EnableMma__Crow__SM60.html" target="_self">EnableMma_Crow_SM60</a></td><td class="desc">Determines whether to enable thread::Gemm<> specializations compatible with SM50 </td></tr>
|
||||
<tr id="row_0_3_2_0_1_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1thread_1_1detail_1_1Mma__HFMA2.html" target="_self">Mma_HFMA2</a></td><td class="desc">Structure to compute the matrix product for HFMA </td></tr>
|
||||
<tr id="row_0_3_2_0_2_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1thread_1_1detail_1_1Mma__HFMA2_3_01Shape_00_01layout_1_1ColumnMajor_00_72621f7ab9ae4a4ba4fe9725cf8e89c1.html" target="_self">Mma_HFMA2< Shape, layout::ColumnMajor, layout::ColumnMajor, layout::ColumnMajor, true ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_2_0_3_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1thread_1_1detail_1_1Mma__HFMA2_3_01Shape_00_01layout_1_1ColumnMajor_00_94c813e3bbfb6f9857c155166f772687.html" target="_self">Mma_HFMA2< Shape, layout::ColumnMajor, layout::ColumnMajor, layout::RowMajor, true ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_2_0_4_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1thread_1_1detail_1_1Mma__HFMA2_3_01Shape_00_01layout_1_1ColumnMajor_00_17070298bc4cced0a1b98aee2bb6b455.html" target="_self">Mma_HFMA2< Shape, layout::ColumnMajor, layout::RowMajor, layout::ColumnMajor, true ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_2_0_5_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1thread_1_1detail_1_1Mma__HFMA2_3_01Shape_00_01layout_1_1ColumnMajor_00_bf6d29bb09a025e7b96942809743e28a.html" target="_self">Mma_HFMA2< Shape, layout::ColumnMajor, layout::RowMajor, layout::RowMajor, true ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_2_0_6_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1thread_1_1detail_1_1Mma__HFMA2_3_01Shape_00_01layout_1_1RowMajor_00_01l26a133b13650c1d058273e3649f60f04.html" target="_self">Mma_HFMA2< Shape, layout::RowMajor, layout::ColumnMajor, layout::ColumnMajor, true ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_2_0_7_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1thread_1_1detail_1_1Mma__HFMA2_3_01Shape_00_01layout_1_1RowMajor_00_01lbba3a796be96a0276693ef6b259ecc4a.html" target="_self">Mma_HFMA2< Shape, layout::RowMajor, layout::ColumnMajor, layout::RowMajor, true ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_2_0_8_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1thread_1_1detail_1_1Mma__HFMA2_3_01Shape_00_01layout_1_1RowMajor_00_01l2aa4d2fd2e940e0d0cf7c47bc8f6017c.html" target="_self">Mma_HFMA2< Shape, layout::RowMajor, layout::RowMajor, layout::ColumnMajor, true ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_2_0_9_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1thread_1_1detail_1_1Mma__HFMA2_3_01Shape_00_01layout_1_1RowMajor_00_01l086c058a15d6c79558e4f3d9ff1dc148.html" target="_self">Mma_HFMA2< Shape, layout::RowMajor, layout::RowMajor, layout::RowMajor, true ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_2_0_10_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1thread_1_1detail_1_1Mma__HFMA2_3_01Shape_00_01LayoutA_00_01LayoutB_00_0e1104c65871c539155bd3a0c7631928b.html" target="_self">Mma_HFMA2< Shape, LayoutA, LayoutB, layout::ColumnMajor, false ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_2_0_11_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1thread_1_1detail_1_1Mma__HFMA2_3_01Shape_00_01LayoutA_00_01LayoutB_00_07ac147cb320ee0d28ff8e78eb4cd330e.html" target="_self">Mma_HFMA2< Shape, LayoutA, LayoutB, layout::RowMajor, false ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_2_1_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1thread_1_1Mma.html" target="_self">Mma</a></td><td class="desc">Structure to compute the matrix product </td></tr>
|
||||
<tr id="row_0_3_2_2_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape___00_01ElementA___00_01LayoutA___00_01ElementB_e41c1cd6078b6d1347fac239b0639d56.html" target="_self">Mma< Shape_, ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, arch::OpMultiplyAdd, bool ></a></td><td class="desc">Gemplate that handles conventional layouts for FFMA and DFMA GEMM </td></tr>
|
||||
<tr id="row_0_3_2_3_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape___00_01half__t_00_01LayoutA_00_01half__t_00_01L066c9d2371712cdf0cac099ca9bcc578.html" target="_self">Mma< Shape_, half_t, LayoutA, half_t, LayoutB, half_t, LayoutC, arch::OpMultiplyAdd ></a></td><td class="desc">Structure to compute the matrix product </td></tr>
|
||||
<tr id="row_0_3_2_4_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape___00_01half__t_00_01LayoutA___00_01half__t_00_088f0e99e501b6012297eb30b4e89bcea.html" target="_self">Mma< Shape_, half_t, LayoutA_, half_t, LayoutB_, half_t, layout::RowMajor, arch::OpMultiplyAdd, typename platform::enable_if< detail::EnableMma_Crow_SM60< LayoutA_, LayoutB_ >::value >::type ></a></td><td class="desc">Computes matrix product when C is row-major </td></tr>
|
||||
<tr id="row_0_3_2_5_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape___00_01int8__t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html" target="_self">Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t ></a></td><td class="desc">Gemplate that handles conventional layouts for IDP4A </td></tr>
|
||||
<tr id="row_0_3_2_6_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape___00_01int8__t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html" target="_self">Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool ></a></td><td class="desc">Gemplate that handles conventional layouts for IDP4A </td></tr>
|
||||
<tr id="row_0_3_2_7_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1thread_1_1MmaGeneric.html" target="_self">MmaGeneric</a></td><td class="desc">Gemplate that handles all packed matrix layouts </td></tr>
|
||||
<tr id="row_0_3_3_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span id="arr_0_3_3_" class="arrow" onclick="toggleFolder('0_3_3_')">►</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacecutlass_1_1gemm_1_1threadblock.html" target="_self">threadblock</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_3_0_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1threadblock_1_1DefaultGemvCore.html" target="_self">DefaultGemvCore</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_3_1_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1threadblock_1_1DefaultMma.html" target="_self">DefaultMma</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_3_2_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1threadblock_1_1DefaultMma_3_01ElementA_00_01LayoutA_00_01kAlignmentA_0010764e1fd5a3251a57eddafbd83eab8e.html" target="_self">DefaultMma< ElementA, LayoutA, kAlignmentA, ElementB, LayoutB, kAlignmentB, ElementAccumulator, layout::ColumnMajorInterleaved< InterleavedK >, OperatorClass, ArchTag, ThreadblockShape, WarpShape, InstructionShape, 2, Operator, true ></a></td><td class="desc">Specialization for column-major-interleaved output </td></tr>
|
||||
<tr id="row_0_3_3_3_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1threadblock_1_1DefaultMma_3_01ElementA_00_01LayoutA_00_01kAlignmentA_00c67c16f9881e4f2fda76d8ed83ebabd6.html" target="_self">DefaultMma< ElementA, LayoutA, kAlignmentA, ElementB, LayoutB, kAlignmentB, ElementAccumulator, layout::RowMajor, arch::OpClassSimt, ArchTag, ThreadblockShape, WarpShape, InstructionShape, 2, Operator, false ></a></td><td class="desc">Specialization for row-major output (OperatorClass Simt) </td></tr>
|
||||
<tr id="row_0_3_3_4_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1threadblock_1_1DefaultMma_3_01ElementA_00_01LayoutA_00_01kAlignmentA_00ce36642cae579bce6605ff8edde3c6ab.html" target="_self">DefaultMma< ElementA, LayoutA, kAlignmentA, ElementB, LayoutB, kAlignmentB, ElementAccumulator, layout::RowMajor, arch::OpClassTensorOp, ArchTag, ThreadblockShape, WarpShape, InstructionShape, 2, Operator, false ></a></td><td class="desc">Specialization for row-major output (OperatorClass Simt) </td></tr>
|
||||
<tr id="row_0_3_3_5_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1threadblock_1_1DefaultMma_3_01int8__t_00_01LayoutA_00_01kAlignmentA_00_07e7230d4011ada5e22cfcb29103b696.html" target="_self">DefaultMma< int8_t, LayoutA, kAlignmentA, int8_t, LayoutB, kAlignmentB, ElementAccumulator, layout::RowMajor, arch::OpClassSimt, ArchTag, ThreadblockShape, WarpShape, GemmShape< 1, 1, 4 >, 2, Operator, false ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_3_6_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1threadblock_1_1DefaultMmaCore.html" target="_self">DefaultMmaCore</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_3_7_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1threadblock_1_1DefaultMmaCore_3_01Shape___00_01WarpShape___00_01GemmShab94a11a77dd0565102710907089acee0.html" target="_self">DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_3_8_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1threadblock_1_1DefaultMmaCore_3_01Shape___00_01WarpShape___00_01GemmShafafd5c61db86cbfe90863578ddd11092.html" target="_self">DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_3_9_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1threadblock_1_1DefaultMmaCore_3_01Shape___00_01WarpShape___00_01GemmSha46446d1e3871e31d2e728f710d78c8c1.html" target="_self">DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_3_10_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1threadblock_1_1DefaultMmaCore_3_01Shape___00_01WarpShape___00_01GemmSha8da7a0cfbbe859b701fdd9f2b8566aa7.html" target="_self">DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_3_11_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1threadblock_1_1DefaultMmaCore_3_01Shape___00_01WarpShape___00_01GemmSha84e9f8afb6a4ca9f5dcd219b182d16e7.html" target="_self">DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_3_12_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1threadblock_1_1DefaultMmaCore_3_01Shape___00_01WarpShape___00_01GemmSha2c0d0b7cdb5c4bcb11e83c058eb65345.html" target="_self">DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::ColumnMajor, int8_t, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ ></a></td><td class="desc">Partial specialization: </td></tr>
|
||||
<tr id="row_0_3_3_13_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1threadblock_1_1DefaultMmaCore_3_01Shape___00_01WarpShape___00_01GemmSha34a52cc7b2942e8c290f0032b6779b52.html" target="_self">DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_3_14_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1threadblock_1_1DefaultMmaCore_3_01Shape___00_01WarpShape___00_01GemmShaaf312aafe9da92ea9d417bcc12a8e7dc.html" target="_self">DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ ></a></td><td class="desc">Partial specialization: </td></tr>
|
||||
<tr id="row_0_3_3_15_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1threadblock_1_1DefaultMmaCore_3_01Shape___00_01WarpShape___00_01GemmSha863d4139ccaa713bc4bde32c425f4067.html" target="_self">DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ ></a></td><td class="desc">Partial specialization: </td></tr>
|
||||
<tr id="row_0_3_3_16_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1threadblock_1_1DefaultMmaCore_3_01Shape___00_01WarpShape___00_01GemmShaf03a122202ad10acdc96f280106d678b.html" target="_self">DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_3_17_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1threadblock_1_1DefaultMmaCore_3_01Shape___00_01WarpShape___00_01GemmSha69bef08ea63dd930f99d9788105873dd.html" target="_self">DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_3_18_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1threadblock_1_1DefaultMmaCore_3_01Shape___00_01WarpShape___00_01GemmSha3adf608332a8c9ee7014fced0da8a9ca.html" target="_self">DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_3_19_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1threadblock_1_1DefaultMmaCore_3_01Shape___00_01WarpShape___00_01GemmShab7edfba3cdf43a07e3c4d719d87565a4.html" target="_self">DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_3_20_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1threadblock_1_1DefaultMmaCore_3_01Shape___00_01WarpShape___00_01Instruc803d38bc1e4618c07c47f54c87ae2678.html" target="_self">DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_3_21_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1threadblock_1_1DefaultMmaCore_3_01Shape___00_01WarpShape___00_01Instrucf60fe02fcdd80d28b7fd419133465dcc.html" target="_self">DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_3_22_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1threadblock_1_1DefaultMmaCore_3_01Shape___00_01WarpShape___00_01Instruc2bf00737f4ad0a9da9a8be6d3e66c152.html" target="_self">DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_3_23_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1threadblock_1_1DefaultMmaCore_3_01Shape___00_01WarpShape___00_01Instruc24092ddc01fc83dabb7db4c14880fe60.html" target="_self">DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_3_24_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1threadblock_1_1DefaultMmaCore_3_01Shape___00_01WarpShape___00_01Instruc4fee9f2965b8468bfb42b94a74527d22.html" target="_self">DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_3_25_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1threadblock_1_1GemmBatchedIdentityThreadblockSwizzle.html" target="_self">GemmBatchedIdentityThreadblockSwizzle</a></td><td class="desc">Threadblock swizzling function for batched GEMMs </td></tr>
|
||||
<tr id="row_0_3_3_26_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1threadblock_1_1GemmHorizontalThreadblockSwizzle.html" target="_self">GemmHorizontalThreadblockSwizzle</a></td><td class="desc">Threadblock swizzling function for GEMMs </td></tr>
|
||||
<tr id="row_0_3_3_27_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1threadblock_1_1GemmIdentityThreadblockSwizzle.html" target="_self">GemmIdentityThreadblockSwizzle</a></td><td class="desc">Threadblock swizzling function for GEMMs </td></tr>
|
||||
<tr id="row_0_3_3_28_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1threadblock_1_1GemmSplitKHorizontalThreadblockSwizzle.html" target="_self">GemmSplitKHorizontalThreadblockSwizzle</a></td><td class="desc">Threadblock swizzling function for split-K GEMMs </td></tr>
|
||||
<tr id="row_0_3_3_29_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1threadblock_1_1GemmSplitKIdentityThreadblockSwizzle.html" target="_self">GemmSplitKIdentityThreadblockSwizzle</a></td><td class="desc">Threadblock swizzling function for split-K GEMMs </td></tr>
|
||||
<tr id="row_0_3_3_30_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1threadblock_1_1Gemv.html" target="_self">Gemv</a></td><td class="desc">Structure to compute the matrix-vector product using SIMT math instructions </td></tr>
|
||||
<tr id="row_0_3_3_31_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1threadblock_1_1GemvBatchedStridedThreadblockDefaultSwizzle.html" target="_self">GemvBatchedStridedThreadblockDefaultSwizzle</a></td><td class="desc">Threadblock swizzling function for batched GEMVs </td></tr>
|
||||
<tr id="row_0_3_3_32_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_3_3_32_" class="arrow" onclick="toggleFolder('0_3_3_32_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1threadblock_1_1MmaBase.html" target="_self">MmaBase</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_3_32_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1threadblock_1_1MmaBase_1_1SharedStorage.html" target="_self">SharedStorage</a></td><td class="desc">Shared storage object needed by threadblock-scoped GEMM </td></tr>
|
||||
<tr id="row_0_3_3_33_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1threadblock_1_1MmaPipelined.html" target="_self">MmaPipelined</a></td><td class="desc">Structure to compute the matrix product targeting CUDA cores and SIMT math instructions </td></tr>
|
||||
<tr id="row_0_3_3_34_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1threadblock_1_1MmaPolicy.html" target="_self">MmaPolicy</a></td><td class="desc">Policy object describing MmaTensorOp </td></tr>
|
||||
<tr id="row_0_3_3_35_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1threadblock_1_1MmaSingleStage.html" target="_self">MmaSingleStage</a></td><td class="desc">Structure to compute the matrix product targeting CUDA cores and SIMT math instructions </td></tr>
|
||||
<tr id="row_0_3_4_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span id="arr_0_3_4_" class="arrow" onclick="toggleFolder('0_3_4_')">►</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacecutlass_1_1gemm_1_1warp.html" target="_self">warp</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_4_0_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1warp_1_1DefaultMmaTensorOp.html" target="_self">DefaultMmaTensorOp</a></td><td class="desc">Partial specialization for m-by-n-by-kgroup </td></tr>
|
||||
<tr id="row_0_3_4_1_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1warp_1_1MmaComplexTensorOp.html" target="_self">MmaComplexTensorOp</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_4_2_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1warp_1_1MmaComplexTensorOp_3_01Shape___00_01complex_3_01RealElementA_01_146441010dad1f40eb51b6dae3ded216.html" target="_self">MmaComplexTensorOp< Shape_, complex< RealElementA >, LayoutA_, complex< RealElementB >, LayoutB_, complex< RealElementC >, LayoutC_, Policy_, TransformA, TransformB, Enable ></a></td><td class="desc">Partial specialization for complex*complex+complex => complex using real-valued TensorOps </td></tr>
|
||||
<tr id="row_0_3_4_3_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1warp_1_1MmaSimt.html" target="_self">MmaSimt</a></td><td class="desc">Structure to compute the matrix product targeting CUDA cores and SIMT math instructions </td></tr>
|
||||
<tr id="row_0_3_4_4_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1warp_1_1MmaSimtPolicy.html" target="_self">MmaSimtPolicy</a></td><td class="desc">Describes the arrangement and configuration of per-lane operations in warp-level matrix multiply </td></tr>
|
||||
<tr id="row_0_3_4_5_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1warp_1_1MmaSimtTileIterator.html" target="_self">MmaSimtTileIterator</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_4_6_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1warp_1_1MmaSimtTileIterator_3_01Shape___00_01Operand_1_1kA_00_01Element_67ca7e11a38e38f2c51b84767654a90f.html" target="_self">MmaSimtTileIterator< Shape_, Operand::kA, Element_, layout::ColumnMajor, Policy_, PartitionsK, PartitionGroupSize ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_4_7_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1warp_1_1MmaSimtTileIterator_3_01Shape___00_01Operand_1_1kA_00_01Element_f0ce904a9294556f15e1cc9cf7c99a93.html" target="_self">MmaSimtTileIterator< Shape_, Operand::kA, Element_, layout::ColumnMajorInterleaved< 4 >, Policy_, PartitionsK, PartitionGroupSize ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_4_8_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1warp_1_1MmaSimtTileIterator_3_01Shape___00_01Operand_1_1kB_00_01Element_ea0a4e7ce3cd5d25cabf79383efdf4d9.html" target="_self">MmaSimtTileIterator< Shape_, Operand::kB, Element_, layout::RowMajor, Policy_, PartitionsK, PartitionGroupSize ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_4_9_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1warp_1_1MmaSimtTileIterator_3_01Shape___00_01Operand_1_1kB_00_01Element_ada156b62fcbdce47009c5bf1321c92c.html" target="_self">MmaSimtTileIterator< Shape_, Operand::kB, Element_, layout::RowMajorInterleaved< 4 >, Policy_, PartitionsK, PartitionGroupSize ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_4_10_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1warp_1_1MmaSimtTileIterator_3_01Shape___00_01Operand_1_1kC_00_01Element_4ccafbc821b3a55cd532602442a74031.html" target="_self">MmaSimtTileIterator< Shape_, Operand::kC, Element_, layout::ColumnMajor, Policy_ ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_4_11_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1warp_1_1MmaSimtTileIterator_3_01Shape___00_01Operand_1_1kC_00_01Element_8f92ea79e85febb67169c4b2d94b1b20.html" target="_self">MmaSimtTileIterator< Shape_, Operand::kC, Element_, layout::RowMajor, Policy_ ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_4_12_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1warp_1_1MmaTensorOp.html" target="_self">MmaTensorOp</a></td><td class="desc">Structure to compute the matrix product targeting CUDA cores and SIMT math instructions </td></tr>
|
||||
<tr id="row_0_3_4_13_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1warp_1_1MmaTensorOpAccumulatorTileIterator.html" target="_self">MmaTensorOpAccumulatorTileIterator</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_4_14_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_3_4_14_" class="arrow" onclick="toggleFolder('0_3_4_14_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1warp_1_1MmaTensorOpAccumulatorTileIterator_3_01Shape___00_01Element___008f607b871a2b3d854eb4def64712c042.html" target="_self">MmaTensorOpAccumulatorTileIterator< Shape_, Element_, cutlass::layout::ColumnMajor, InstructionShape_, OpDelta_ ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_4_14_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1warp_1_1MmaTensorOpAccumulatorTileIterator_3_01Shape___00_01Element___0d35fa5dc4e4b4f72784c943fd857fc1d.html" target="_self">Policy</a></td><td class="desc">Internal structure of iterator - made public to enable introspection </td></tr>
|
||||
<tr id="row_0_3_4_15_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_3_4_15_" class="arrow" onclick="toggleFolder('0_3_4_15_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1warp_1_1MmaTensorOpAccumulatorTileIterator_3_01Shape___00_01Element___00027dabdc144edd6276f664ca74088510.html" target="_self">MmaTensorOpAccumulatorTileIterator< Shape_, Element_, cutlass::layout::ColumnMajorInterleaved< InterleavedN >, InstructionShape_, OpDelta_ ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_4_15_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1warp_1_1MmaTensorOpAccumulatorTileIterator_3_01Shape___00_01Element___03822d9be37f3725022005a5434441f22.html" target="_self">Policy</a></td><td class="desc">Internal structure of iterator - made public to enable introspection </td></tr>
|
||||
<tr id="row_0_3_4_16_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_3_4_16_" class="arrow" onclick="toggleFolder('0_3_4_16_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1warp_1_1MmaTensorOpAccumulatorTileIterator_3_01Shape___00_01Element___006c39f57875e0aa9d0ad82c8043ed8b98.html" target="_self">MmaTensorOpAccumulatorTileIterator< Shape_, Element_, cutlass::layout::RowMajor, InstructionShape_, OpDelta_ ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_4_16_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1warp_1_1MmaTensorOpAccumulatorTileIterator_3_01Shape___00_01Element___093b5d2838ac5a742704ef62b5c8688f0.html" target="_self">Policy</a></td><td class="desc">Internal structure of iterator - made public to enable introspection </td></tr>
|
||||
<tr id="row_0_3_4_17_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1warp_1_1MmaTensorOpMultiplicandTileIterator.html" target="_self">MmaTensorOpMultiplicandTileIterator</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_4_18_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1warp_1_1MmaTensorOpMultiplicandTileIterator_3_01Shape___00_01Operand___0b84f53cd44b339eccc12067c9f86e11c.html" target="_self">MmaTensorOpMultiplicandTileIterator< Shape_, Operand_, Element_, cutlass::layout::ColumnMajorTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value, int(128/sizeof(Element_))>, InstructionShape_, OpDelta_, 32, PartitionsK_ ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_4_19_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1warp_1_1MmaTensorOpMultiplicandTileIterator_3_01Shape___00_01Operand___0e52ad425e1ee3e68544873f66733237b.html" target="_self">MmaTensorOpMultiplicandTileIterator< Shape_, Operand_, Element_, cutlass::layout::ColumnMajorTensorOpMultiplicandCrosswise< sizeof_bits< Element_ >::value, Crosswise >, InstructionShape_, OpDelta_, 32, PartitionsK_ ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_4_20_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1warp_1_1MmaTensorOpMultiplicandTileIterator_3_01Shape___00_01Operand___039819fb3ccd43786d556c2c9669508ef.html" target="_self">MmaTensorOpMultiplicandTileIterator< Shape_, Operand_, Element_, cutlass::layout::RowMajorTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value, int(128/sizeof(Element_))>, InstructionShape_, OpDelta_, 32, PartitionsK_ ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_4_21_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1warp_1_1MmaTensorOpMultiplicandTileIterator_3_01Shape___00_01Operand___0352e0dcab42bc8360606874e00173556.html" target="_self">MmaTensorOpMultiplicandTileIterator< Shape_, Operand_, Element_, cutlass::layout::RowMajorTensorOpMultiplicandCrosswise< sizeof_bits< Element_ >::value, Crosswise >, InstructionShape_, OpDelta_, 32, PartitionsK_ ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_4_22_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_3_4_22_" class="arrow" onclick="toggleFolder('0_3_4_22_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1warp_1_1MmaTensorOpMultiplicandTileIterator_3_01Shape___00_01Operand___0ed7daaeba1c095e77f68533d4d2c475c.html" target="_self">MmaTensorOpMultiplicandTileIterator< Shape_, Operand_, Element_, cutlass::layout::TensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value, 64 >, InstructionShape_, OpDelta_, 32, PartitionsK_ ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_4_22_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1warp_1_1MmaTensorOpMultiplicandTileIterator_3_01Shape___00_01Operand___07638f8b7761f6e2e2e6918e2c05e739.html" target="_self">Policy</a></td><td class="desc">Internal structure of iterator - made public to enable introspection </td></tr>
|
||||
<tr id="row_0_3_4_23_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_3_4_23_" class="arrow" onclick="toggleFolder('0_3_4_23_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1warp_1_1MmaTensorOpMultiplicandTileIterator_3_01Shape___00_01Operand___0c7d419c589d601ce4eb603be566fea21.html" target="_self">MmaTensorOpMultiplicandTileIterator< Shape_, Operand_, Element_, cutlass::layout::TensorOpMultiplicandCrosswise< sizeof_bits< Element_ >::value, Crosswise >, InstructionShape_, OpDelta_, 32, PartitionsK_ ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_4_23_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1warp_1_1MmaTensorOpMultiplicandTileIterator_3_01Shape___00_01Operand___0784c74bd670999ec23ad8ef9dc55777.html" target="_self">Policy</a></td><td class="desc">Internal structure of iterator - made public to enable introspection </td></tr>
|
||||
<tr id="row_0_3_4_24_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1warp_1_1MmaTensorOpPolicy.html" target="_self">MmaTensorOpPolicy</a></td><td class="desc">Policy </td></tr>
|
||||
<tr id="row_0_3_4_25_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1warp_1_1MmaVoltaTensorOp.html" target="_self">MmaVoltaTensorOp</a></td><td class="desc">Structure to compute the matrix product targeting CUDA cores and SIMT math instructions </td></tr>
|
||||
<tr id="row_0_3_4_26_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_3_4_26_" class="arrow" onclick="toggleFolder('0_3_4_26_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1warp_1_1MmaVoltaTensorOpAccumulatorTileIterator.html" target="_self">MmaVoltaTensorOpAccumulatorTileIterator</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_4_26_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1warp_1_1MmaVoltaTensorOpAccumulatorTileIterator_1_1Policy.html" target="_self">Policy</a></td><td class="desc">Internal structure of iterator - made public to enable introspection </td></tr>
|
||||
<tr id="row_0_3_4_27_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1warp_1_1MmaVoltaTensorOpMultiplicandTileIterator.html" target="_self">MmaVoltaTensorOpMultiplicandTileIterator</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_4_28_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1warp_1_1MmaVoltaTensorOpMultiplicandTileIterator_3_01Shape___00_01Operan0d3248553e52cd61ed8a2b3b12a20343.html" target="_self">MmaVoltaTensorOpMultiplicandTileIterator< Shape_, Operand::kA, Element_, cutlass::layout::ColumnMajorVoltaTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value >, InstructionShape_, OpDelta_, 32 ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_4_29_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_3_4_29_" class="arrow" onclick="toggleFolder('0_3_4_29_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1warp_1_1MmaVoltaTensorOpMultiplicandTileIterator_3_01Shape___00_01Operan34be8e21a40af3ebd2dc3dff460dca72.html" target="_self">MmaVoltaTensorOpMultiplicandTileIterator< Shape_, Operand::kA, Element_, cutlass::layout::VoltaTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value >, InstructionShape_, OpDelta_, 32 ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_4_29_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1warp_1_1MmaVoltaTensorOpMultiplicandTileIterator_3_01Shape___00_01Opera33cdf53848564e894d4407637dc86caf.html" target="_self">Policy</a></td><td class="desc">Internal structure of iterator - made public to enable introspection </td></tr>
|
||||
<tr id="row_0_3_4_30_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1warp_1_1MmaVoltaTensorOpMultiplicandTileIterator_3_01Shape___00_01Operand734577b7e54a074d143aba59828c2f2.html" target="_self">MmaVoltaTensorOpMultiplicandTileIterator< Shape_, Operand::kB, Element_, cutlass::layout::RowMajorVoltaTensorOpMultiplicandBCongruous< sizeof_bits< Element_ >::value >, InstructionShape_, OpDelta_, 32 ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_4_31_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_3_4_31_" class="arrow" onclick="toggleFolder('0_3_4_31_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1warp_1_1MmaVoltaTensorOpMultiplicandTileIterator_3_01Shape___00_01Operan16c56cdc2dda5eeb996af8ec0242d501.html" target="_self">MmaVoltaTensorOpMultiplicandTileIterator< Shape_, Operand::kB, Element_, cutlass::layout::VoltaTensorOpMultiplicandBCongruous< sizeof_bits< Element_ >::value >, InstructionShape_, OpDelta_, 32 ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_4_31_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1warp_1_1MmaVoltaTensorOpMultiplicandTileIterator_3_01Shape___00_01Opera6fa6d2d3725bb3ec613d5c527ea3ffe7.html" target="_self">Policy</a></td><td class="desc">Internal structure of iterator - made public to enable introspection </td></tr>
|
||||
<tr id="row_0_3_4_32_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1warp_1_1MmaVoltaTensorOpMultiplicandTileIterator_3_01Shape___00_01Operan5a221944f4a0e16ccab77ba684856942.html" target="_self">MmaVoltaTensorOpMultiplicandTileIterator< Shape_, Operand_, Element_, cutlass::layout::ColumnMajorVoltaTensorOpMultiplicandCrosswise< sizeof_bits< Element_ >::value, KBlock >, InstructionShape_, OpDelta_, 32 ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_4_33_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1warp_1_1MmaVoltaTensorOpMultiplicandTileIterator_3_01Shape___00_01Operandcc9821c435540895138bc9af495f321.html" target="_self">MmaVoltaTensorOpMultiplicandTileIterator< Shape_, Operand_, Element_, cutlass::layout::RowMajorVoltaTensorOpMultiplicandCrosswise< sizeof_bits< Element_ >::value, KBlock >, InstructionShape_, OpDelta_, 32 ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_4_34_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_3_4_34_" class="arrow" onclick="toggleFolder('0_3_4_34_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1gemm_1_1warp_1_1MmaVoltaTensorOpMultiplicandTileIterator_3_01Shape___00_01Operana2f40b28f0d2286b84d86f7238d67b52.html" target="_self">MmaVoltaTensorOpMultiplicandTileIterator< Shape_, Operand_, Element_, cutlass::layout::VoltaTensorOpMultiplicandCrosswise< sizeof_bits< Element_ >::value, KBlock >, InstructionShape_, OpDelta_, 32 ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_4_34_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1warp_1_1MmaVoltaTensorOpMultiplicandTileIterator_3_01Shape___00_01Operafa294175b280756dd8388f9ffe7b72c4.html" target="_self">Policy</a></td><td class="desc">Internal structure of iterator - made public to enable introspection </td></tr>
|
||||
<tr id="row_0_3_4_35_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1warp_1_1WarpSize.html" target="_self">WarpSize</a></td><td class="desc">Query the number of threads per warp </td></tr>
|
||||
<tr id="row_0_3_5_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1BatchedGemmCoord.html" target="_self">BatchedGemmCoord</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_6_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1GemmCoord.html" target="_self">GemmCoord</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_3_7_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1gemm_1_1GemmShape.html" target="_self">GemmShape</a></td><td class="desc">Shape of a matrix multiply-add operation </td></tr>
|
||||
<tr id="row_0_4_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;"> </span><span id="arr_0_4_" class="arrow" onclick="toggleFolder('0_4_')">►</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacecutlass_1_1layout.html" target="_self">layout</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_4_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1layout_1_1ColumnMajor.html" target="_self">ColumnMajor</a></td><td class="desc">Mapping function for column-major matrices </td></tr>
|
||||
<tr id="row_0_4_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1layout_1_1ColumnMajorBlockLinear.html" target="_self">ColumnMajorBlockLinear</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_4_2_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1layout_1_1ColumnMajorInterleaved.html" target="_self">ColumnMajorInterleaved</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_4_3_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1layout_1_1ColumnMajorTensorOpMultiplicandCongruous.html" target="_self">ColumnMajorTensorOpMultiplicandCongruous</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_4_4_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1layout_1_1ColumnMajorTensorOpMultiplicandCrosswise.html" target="_self">ColumnMajorTensorOpMultiplicandCrosswise</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_4_5_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1layout_1_1ColumnMajorVoltaTensorOpMultiplicandBCongruous.html" target="_self">ColumnMajorVoltaTensorOpMultiplicandBCongruous</a></td><td class="desc">Template mapping a column-major view of pitch-linear memory to <a class="el" href="structcutlass_1_1layout_1_1VoltaTensorOpMultiplicandCongruous.html" title="Template based on element size (in bits) - defined in terms of pitch-linear memory. ">VoltaTensorOpMultiplicandCongruous</a> </td></tr>
|
||||
<tr id="row_0_4_6_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1layout_1_1ColumnMajorVoltaTensorOpMultiplicandCongruous.html" target="_self">ColumnMajorVoltaTensorOpMultiplicandCongruous</a></td><td class="desc">Template mapping a column-major view of pitch-linear memory to <a class="el" href="structcutlass_1_1layout_1_1VoltaTensorOpMultiplicandCongruous.html" title="Template based on element size (in bits) - defined in terms of pitch-linear memory. ">VoltaTensorOpMultiplicandCongruous</a> </td></tr>
|
||||
<tr id="row_0_4_7_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1layout_1_1ColumnMajorVoltaTensorOpMultiplicandCrosswise.html" target="_self">ColumnMajorVoltaTensorOpMultiplicandCrosswise</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_4_8_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1layout_1_1ContiguousMatrix.html" target="_self">ContiguousMatrix</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_4_9_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1layout_1_1GeneralMatrix.html" target="_self">GeneralMatrix</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_4_10_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1layout_1_1LayoutTranspose.html" target="_self">LayoutTranspose</a></td><td class="desc">Defines transposes of matrix layouts </td></tr>
|
||||
<tr id="row_0_4_11_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1layout_1_1LayoutTranspose_3_01layout_1_1ColumnMajor_01_4.html" target="_self">LayoutTranspose< layout::ColumnMajor ></a></td><td class="desc">Transpose of column-major is row-major </td></tr>
|
||||
<tr id="row_0_4_12_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1layout_1_1LayoutTranspose_3_01layout_1_1RowMajor_01_4.html" target="_self">LayoutTranspose< layout::RowMajor ></a></td><td class="desc">Transpose of row-major is column-major </td></tr>
|
||||
<tr id="row_0_4_13_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1layout_1_1PackedVectorLayout.html" target="_self">PackedVectorLayout</a></td><td class="desc">Tensor layout for densely packed vectors </td></tr>
|
||||
<tr id="row_0_4_14_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1layout_1_1PitchLinear.html" target="_self">PitchLinear</a></td><td class="desc">Mapping function for pitch-linear memory </td></tr>
|
||||
<tr id="row_0_4_15_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1layout_1_1PitchLinearCoord.html" target="_self">PitchLinearCoord</a></td><td class="desc">Coordinate in pitch-linear space </td></tr>
|
||||
<tr id="row_0_4_16_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1layout_1_1PitchLinearShape.html" target="_self">PitchLinearShape</a></td><td class="desc">Template defining a shape used by pitch-linear operators </td></tr>
|
||||
<tr id="row_0_4_17_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1layout_1_1RowMajor.html" target="_self">RowMajor</a></td><td class="desc">Mapping function for row-major matrices </td></tr>
|
||||
<tr id="row_0_4_18_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1layout_1_1RowMajorBlockLinear.html" target="_self">RowMajorBlockLinear</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_4_19_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1layout_1_1RowMajorInterleaved.html" target="_self">RowMajorInterleaved</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_4_20_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1layout_1_1RowMajorTensorOpMultiplicandCongruous.html" target="_self">RowMajorTensorOpMultiplicandCongruous</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_4_21_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1layout_1_1RowMajorTensorOpMultiplicandCrosswise.html" target="_self">RowMajorTensorOpMultiplicandCrosswise</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_4_22_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1layout_1_1RowMajorVoltaTensorOpMultiplicandBCongruous.html" target="_self">RowMajorVoltaTensorOpMultiplicandBCongruous</a></td><td class="desc">Template mapping a row-major view of pitch-linear memory to <a class="el" href="structcutlass_1_1layout_1_1VoltaTensorOpMultiplicandCongruous.html" title="Template based on element size (in bits) - defined in terms of pitch-linear memory. ">VoltaTensorOpMultiplicandCongruous</a> </td></tr>
|
||||
<tr id="row_0_4_23_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1layout_1_1RowMajorVoltaTensorOpMultiplicandCongruous.html" target="_self">RowMajorVoltaTensorOpMultiplicandCongruous</a></td><td class="desc">Template mapping a row-major view of pitch-linear memory to <a class="el" href="structcutlass_1_1layout_1_1VoltaTensorOpMultiplicandCongruous.html" title="Template based on element size (in bits) - defined in terms of pitch-linear memory. ">VoltaTensorOpMultiplicandCongruous</a> </td></tr>
|
||||
<tr id="row_0_4_24_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1layout_1_1RowMajorVoltaTensorOpMultiplicandCrosswise.html" target="_self">RowMajorVoltaTensorOpMultiplicandCrosswise</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_4_25_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1layout_1_1TensorCxRSKx.html" target="_self">TensorCxRSKx</a></td><td class="desc">Mapping function for 4-D CxRSKx tensors </td></tr>
|
||||
<tr id="row_0_4_26_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1layout_1_1TensorNCHW.html" target="_self">TensorNCHW</a></td><td class="desc">Mapping function for 4-D NCHW tensors </td></tr>
|
||||
<tr id="row_0_4_27_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1layout_1_1TensorNCxHWx.html" target="_self">TensorNCxHWx</a></td><td class="desc">Mapping function for 4-D NC/xHWx tensors </td></tr>
|
||||
<tr id="row_0_4_28_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1layout_1_1TensorNHWC.html" target="_self">TensorNHWC</a></td><td class="desc">Mapping function for 4-D NHWC tensors </td></tr>
|
||||
<tr id="row_0_4_29_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1layout_1_1TensorOpMultiplicand.html" target="_self">TensorOpMultiplicand</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_4_30_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1layout_1_1TensorOpMultiplicandColumnMajorInterleaved.html" target="_self">TensorOpMultiplicandColumnMajorInterleaved</a></td><td class="desc">Template based on element size (in bits) - defined in terms of pitch-linear memory </td></tr>
|
||||
<tr id="row_0_4_31_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1layout_1_1TensorOpMultiplicandCongruous.html" target="_self">TensorOpMultiplicandCongruous</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_4_32_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1layout_1_1TensorOpMultiplicandCongruous_3_0132_00_01Crosswise_01_4.html" target="_self">TensorOpMultiplicandCongruous< 32, Crosswise ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_4_33_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1layout_1_1TensorOpMultiplicandCrosswise.html" target="_self">TensorOpMultiplicandCrosswise</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_4_34_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1layout_1_1TensorOpMultiplicandRowMajorInterleaved.html" target="_self">TensorOpMultiplicandRowMajorInterleaved</a></td><td class="desc">Template based on element size (in bits) - defined in terms of pitch-linear memory </td></tr>
|
||||
<tr id="row_0_4_35_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1layout_1_1VoltaTensorOpMultiplicandBCongruous.html" target="_self">VoltaTensorOpMultiplicandBCongruous</a></td><td class="desc">Template based on element size (in bits) - defined in terms of pitch-linear memory </td></tr>
|
||||
<tr id="row_0_4_36_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1layout_1_1VoltaTensorOpMultiplicandCongruous.html" target="_self">VoltaTensorOpMultiplicandCongruous</a></td><td class="desc">Template based on element size (in bits) - defined in terms of pitch-linear memory </td></tr>
|
||||
<tr id="row_0_4_37_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1layout_1_1VoltaTensorOpMultiplicandCrosswise.html" target="_self">VoltaTensorOpMultiplicandCrosswise</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_5_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;"> </span><span id="arr_0_5_" class="arrow" onclick="toggleFolder('0_5_')">►</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacecutlass_1_1library.html" target="_self">library</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_5_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1library_1_1GemmArguments.html" target="_self">GemmArguments</a></td><td class="desc">Arguments for GEMM </td></tr>
|
||||
<tr id="row_0_5_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1library_1_1GemmArrayArguments.html" target="_self">GemmArrayArguments</a></td><td class="desc">Arguments for GEMM - used by all the GEMM operations </td></tr>
|
||||
<tr id="row_0_5_2_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1library_1_1GemmArrayConfiguration.html" target="_self">GemmArrayConfiguration</a></td><td class="desc">Configuration for batched GEMM in which multiple matrix products are computed </td></tr>
|
||||
<tr id="row_0_5_3_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1library_1_1GemmBatchedConfiguration.html" target="_self">GemmBatchedConfiguration</a></td><td class="desc">Configuration for batched GEMM in which multiple matrix products are computed </td></tr>
|
||||
<tr id="row_0_5_4_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1library_1_1GemmConfiguration.html" target="_self">GemmConfiguration</a></td><td class="desc">Configuration for basic GEMM operations </td></tr>
|
||||
<tr id="row_0_5_5_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1library_1_1GemmDescription.html" target="_self">GemmDescription</a></td><td class="desc">Description of all GEMM computations </td></tr>
|
||||
<tr id="row_0_5_6_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1library_1_1GemmPlanarComplexBatchedConfiguration.html" target="_self">GemmPlanarComplexBatchedConfiguration</a></td><td class="desc">Batched complex valued GEMM in which real and imaginary parts are separated by a stride </td></tr>
|
||||
<tr id="row_0_5_7_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1library_1_1GemmPlanarComplexConfiguration.html" target="_self">GemmPlanarComplexConfiguration</a></td><td class="desc">Complex valued GEMM in which real and imaginary parts are separated by a stride </td></tr>
|
||||
<tr id="row_0_5_8_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1library_1_1Manifest.html" target="_self">Manifest</a></td><td class="desc"><a class="el" href="classcutlass_1_1library_1_1Manifest.html" title="Manifest of CUTLASS Library. ">Manifest</a> of CUTLASS Library </td></tr>
|
||||
<tr id="row_0_5_9_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1library_1_1MathInstructionDescription.html" target="_self">MathInstructionDescription</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_5_10_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1library_1_1Operation.html" target="_self">Operation</a></td><td class="desc">Base class for all device-wide operations </td></tr>
|
||||
<tr id="row_0_5_11_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1library_1_1OperationDescription.html" target="_self">OperationDescription</a></td><td class="desc">High-level description of an operation </td></tr>
|
||||
<tr id="row_0_5_12_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1library_1_1TensorDescription.html" target="_self">TensorDescription</a></td><td class="desc">Structure describing the properties of a tensor </td></tr>
|
||||
<tr id="row_0_5_13_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1library_1_1TileDescription.html" target="_self">TileDescription</a></td><td class="desc">Structure describing the tiled structure of a GEMM-like computation </td></tr>
|
||||
<tr id="row_0_6_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;"> </span><span id="arr_0_6_" class="arrow" onclick="toggleFolder('0_6_')">►</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacecutlass_1_1platform.html" target="_self">platform</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_6_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1aligned__chunk.html" target="_self">aligned_chunk</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_6_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1aligned__storage.html" target="_self">aligned_storage</a></td><td class="desc">Std::aligned_storage </td></tr>
|
||||
<tr id="row_0_6_2_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span id="arr_0_6_2_" class="arrow" onclick="toggleFolder('0_6_2_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1alignment__of.html" target="_self">alignment_of</a></td><td class="desc">Std::alignment_of </td></tr>
|
||||
<tr id="row_0_6_2_0_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1alignment__of_1_1pad.html" target="_self">pad</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_6_3_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1alignment__of_3_01const_01value__t_01_4.html" target="_self">alignment_of< const value_t ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_6_4_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1alignment__of_3_01const_01volatile_01value__t_01_4.html" target="_self">alignment_of< const volatile value_t ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_6_5_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1alignment__of_3_01double2_01_4.html" target="_self">alignment_of< double2 ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_6_6_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1alignment__of_3_01double4_01_4.html" target="_self">alignment_of< double4 ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_6_7_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1alignment__of_3_01float4_01_4.html" target="_self">alignment_of< float4 ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_6_8_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1alignment__of_3_01int4_01_4.html" target="_self">alignment_of< int4 ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_6_9_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1alignment__of_3_01long4_01_4.html" target="_self">alignment_of< long4 ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_6_10_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1alignment__of_3_01longlong2_01_4.html" target="_self">alignment_of< longlong2 ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_6_11_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1alignment__of_3_01longlong4_01_4.html" target="_self">alignment_of< longlong4 ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_6_12_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1alignment__of_3_01uint4_01_4.html" target="_self">alignment_of< uint4 ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_6_13_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1alignment__of_3_01ulong4_01_4.html" target="_self">alignment_of< ulong4 ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_6_14_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1alignment__of_3_01ulonglong2_01_4.html" target="_self">alignment_of< ulonglong2 ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_6_15_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1alignment__of_3_01ulonglong4_01_4.html" target="_self">alignment_of< ulonglong4 ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_6_16_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1alignment__of_3_01volatile_01value__t_01_4.html" target="_self">alignment_of< volatile value_t ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_6_17_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1bool__constant.html" target="_self">bool_constant</a></td><td class="desc">Std::bool_constant </td></tr>
|
||||
<tr id="row_0_6_18_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1conditional.html" target="_self">conditional</a></td><td class="desc">Std::conditional (true specialization) </td></tr>
|
||||
<tr id="row_0_6_19_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1conditional_3_01false_00_01T_00_01F_01_4.html" target="_self">conditional< false, T, F ></a></td><td class="desc">Std::conditional (false specialization) </td></tr>
|
||||
<tr id="row_0_6_20_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1default__delete.html" target="_self">default_delete</a></td><td class="desc">Default deleter </td></tr>
|
||||
<tr id="row_0_6_21_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1default__delete_3_01T[]_4.html" target="_self">default_delete< T[]></a></td><td class="desc">Partial specialization for deleting array types </td></tr>
|
||||
<tr id="row_0_6_22_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1enable__if.html" target="_self">enable_if</a></td><td class="desc">Std::enable_if (true specialization) </td></tr>
|
||||
<tr id="row_0_6_23_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1enable__if_3_01false_00_01T_01_4.html" target="_self">enable_if< false, T ></a></td><td class="desc">Std::enable_if (false specialization) </td></tr>
|
||||
<tr id="row_0_6_24_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1integral__constant.html" target="_self">integral_constant</a></td><td class="desc">Std::integral_constant </td></tr>
|
||||
<tr id="row_0_6_25_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1is__arithmetic.html" target="_self">is_arithmetic</a></td><td class="desc">Std::is_arithmetic </td></tr>
|
||||
<tr id="row_0_6_26_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1is__base__of.html" target="_self">is_base_of</a></td><td class="desc">Std::is_base_of </td></tr>
|
||||
<tr id="row_0_6_27_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span id="arr_0_6_27_" class="arrow" onclick="toggleFolder('0_6_27_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1is__base__of__helper.html" target="_self">is_base_of_helper</a></td><td class="desc">Helper for std::is_base_of </td></tr>
|
||||
<tr id="row_0_6_27_0_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1is__base__of__helper_1_1dummy.html" target="_self">dummy</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_6_28_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1is__floating__point.html" target="_self">is_floating_point</a></td><td class="desc">Std::is_floating_point </td></tr>
|
||||
<tr id="row_0_6_29_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1is__fundamental.html" target="_self">is_fundamental</a></td><td class="desc">Std::is_fundamental </td></tr>
|
||||
<tr id="row_0_6_30_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1is__integral.html" target="_self">is_integral</a></td><td class="desc">Std::is_integral </td></tr>
|
||||
<tr id="row_0_6_31_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1is__integral_3_01char_01_4.html" target="_self">is_integral< char ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_6_32_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1is__integral_3_01const_01T_01_4.html" target="_self">is_integral< const T ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_6_33_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1is__integral_3_01const_01volatile_01T_01_4.html" target="_self">is_integral< const volatile T ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_6_34_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1is__integral_3_01int_01_4.html" target="_self">is_integral< int ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_6_35_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1is__integral_3_01long_01_4.html" target="_self">is_integral< long ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_6_36_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1is__integral_3_01long_01long_01_4.html" target="_self">is_integral< long long ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_6_37_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1is__integral_3_01short_01_4.html" target="_self">is_integral< short ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_6_38_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1is__integral_3_01signed_01char_01_4.html" target="_self">is_integral< signed char ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_6_39_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1is__integral_3_01unsigned_01char_01_4.html" target="_self">is_integral< unsigned char ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_6_40_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1is__integral_3_01unsigned_01int_01_4.html" target="_self">is_integral< unsigned int ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_6_41_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01_4.html" target="_self">is_integral< unsigned long ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_6_42_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01long_01_4.html" target="_self">is_integral< unsigned long long ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_6_43_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1is__integral_3_01unsigned_01short_01_4.html" target="_self">is_integral< unsigned short ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_6_44_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1is__integral_3_01volatile_01T_01_4.html" target="_self">is_integral< volatile T ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_6_45_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1is__pointer.html" target="_self">is_pointer</a></td><td class="desc">Std::is_pointer </td></tr>
|
||||
<tr id="row_0_6_46_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1is__pointer__helper.html" target="_self">is_pointer_helper</a></td><td class="desc">Helper for std::is_pointer (false specialization) </td></tr>
|
||||
<tr id="row_0_6_47_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1is__pointer__helper_3_01T_01_5_01_4.html" target="_self">is_pointer_helper< T * ></a></td><td class="desc">Helper for std::is_pointer (true specialization) </td></tr>
|
||||
<tr id="row_0_6_48_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1is__same.html" target="_self">is_same</a></td><td class="desc">Std::is_same (false specialization) </td></tr>
|
||||
<tr id="row_0_6_49_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1is__same_3_01A_00_01A_01_4.html" target="_self">is_same< A, A ></a></td><td class="desc">Std::is_same (true specialization) </td></tr>
|
||||
<tr id="row_0_6_50_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1is__trivially__copyable.html" target="_self">is_trivially_copyable</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_6_51_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1is__void.html" target="_self">is_void</a></td><td class="desc">Std::is_void </td></tr>
|
||||
<tr id="row_0_6_52_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1is__volatile.html" target="_self">is_volatile</a></td><td class="desc">Std::is_volatile </td></tr>
|
||||
<tr id="row_0_6_53_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1is__volatile_3_01volatile_01T_01_4.html" target="_self">is_volatile< volatile T ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_6_54_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1nullptr__t.html" target="_self">nullptr_t</a></td><td class="desc">Std::nullptr_t </td></tr>
|
||||
<tr id="row_0_6_55_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1remove__const.html" target="_self">remove_const</a></td><td class="desc">Std::remove_const (non-const specialization) </td></tr>
|
||||
<tr id="row_0_6_56_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1remove__const_3_01const_01T_01_4.html" target="_self">remove_const< const T ></a></td><td class="desc">Std::remove_const (const specialization) </td></tr>
|
||||
<tr id="row_0_6_57_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1remove__cv.html" target="_self">remove_cv</a></td><td class="desc">Std::remove_cv </td></tr>
|
||||
<tr id="row_0_6_58_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1remove__volatile.html" target="_self">remove_volatile</a></td><td class="desc">Std::remove_volatile (non-volatile specialization) </td></tr>
|
||||
<tr id="row_0_6_59_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1platform_1_1remove__volatile_3_01volatile_01T_01_4.html" target="_self">remove_volatile< volatile T ></a></td><td class="desc">Std::remove_volatile (volatile specialization) </td></tr>
|
||||
<tr id="row_0_6_60_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1platform_1_1unique__ptr.html" target="_self">unique_ptr</a></td><td class="desc">Std::unique_ptr </td></tr>
|
||||
<tr id="row_0_7_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;"> </span><span id="arr_0_7_" class="arrow" onclick="toggleFolder('0_7_')">►</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacecutlass_1_1reduction.html" target="_self">reduction</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_7_0_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span id="arr_0_7_0_" class="arrow" onclick="toggleFolder('0_7_0_')">►</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacecutlass_1_1reduction_1_1kernel.html" target="_self">kernel</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_7_0_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_7_0_0_" class="arrow" onclick="toggleFolder('0_7_0_0_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1reduction_1_1kernel_1_1ReduceSplitK.html" target="_self">ReduceSplitK</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_7_0_0_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reduction_1_1kernel_1_1ReduceSplitK_1_1Params.html" target="_self">Params</a></td><td class="desc"><a class="el" href="structcutlass_1_1reduction_1_1kernel_1_1ReduceSplitK_1_1Params.html" title="Params structure. ">Params</a> structure </td></tr>
|
||||
<tr id="row_0_7_0_0_1_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reduction_1_1kernel_1_1ReduceSplitK_1_1SharedStorage.html" target="_self">SharedStorage</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_7_1_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span id="arr_0_7_1_" class="arrow" onclick="toggleFolder('0_7_1_')">►</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacecutlass_1_1reduction_1_1thread.html" target="_self">thread</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_7_1_0_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reduction_1_1thread_1_1Reduce.html" target="_self">Reduce</a></td><td class="desc">Structure to compute the thread level reduction </td></tr>
|
||||
<tr id="row_0_7_1_1_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reduction_1_1thread_1_1Reduce_3_01plus_3_01half__t_01_4_00_01AlignedArray_3_01half__t_00_01N_01_4_01_4.html" target="_self">Reduce< plus< half_t >, AlignedArray< half_t, N > ></a></td><td class="desc">Partial specializations of <a class="el" href="structcutlass_1_1reduction_1_1thread_1_1Reduce.html" title="Structure to compute the thread level reduction. ">Reduce</a> for AlignedArray<half_t, N> </td></tr>
|
||||
<tr id="row_0_7_1_2_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reduction_1_1thread_1_1Reduce_3_01plus_3_01half__t_01_4_00_01Array_3_01half__t_00_01N_01_4_01_4.html" target="_self">Reduce< plus< half_t >, Array< half_t, N > ></a></td><td class="desc">Partial specializations of <a class="el" href="structcutlass_1_1reduction_1_1thread_1_1Reduce.html" title="Structure to compute the thread level reduction. ">Reduce</a> for Array<half_t, N> </td></tr>
|
||||
<tr id="row_0_7_1_3_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reduction_1_1thread_1_1Reduce_3_01plus_3_01T_01_4_00_01Array_3_01T_00_01N_01_4_01_4.html" target="_self">Reduce< plus< T >, Array< T, N > ></a></td><td class="desc">Partial specialization of <a class="el" href="structcutlass_1_1reduction_1_1thread_1_1Reduce.html" title="Structure to compute the thread level reduction. ">Reduce</a> for Array<T, N> </td></tr>
|
||||
<tr id="row_0_7_1_4_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reduction_1_1thread_1_1Reduce_3_01plus_3_01T_01_4_00_01T_01_4.html" target="_self">Reduce< plus< T >, T ></a></td><td class="desc">Partial Specialization of <a class="el" href="structcutlass_1_1reduction_1_1thread_1_1Reduce.html" title="Structure to compute the thread level reduction. ">Reduce</a> for "plus" (a functional operator) </td></tr>
|
||||
<tr id="row_0_7_1_5_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_7_1_5_" class="arrow" onclick="toggleFolder('0_7_1_5_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reduction_1_1thread_1_1ReduceAdd.html" target="_self">ReduceAdd</a></td><td class="desc">Mixed-precision reduction </td></tr>
|
||||
<tr id="row_0_7_1_5_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reduction_1_1thread_1_1ReduceAdd_1_1Params.html" target="_self">Params</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_7_2_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reduction_1_1BatchedReduction.html" target="_self">BatchedReduction</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_7_3_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span id="arr_0_7_3_" class="arrow" onclick="toggleFolder('0_7_3_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reduction_1_1BatchedReductionTraits.html" target="_self">BatchedReductionTraits</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_7_3_0_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reduction_1_1BatchedReductionTraits_1_1Params.html" target="_self">Params</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_7_4_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reduction_1_1DefaultBlockSwizzle.html" target="_self">DefaultBlockSwizzle</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_8_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;"> </span><span id="arr_0_8_" class="arrow" onclick="toggleFolder('0_8_')">►</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacecutlass_1_1reference.html" target="_self">reference</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_8_0_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span id="arr_0_8_0_" class="arrow" onclick="toggleFolder('0_8_0_')">►</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacecutlass_1_1reference_1_1detail.html" target="_self">detail</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_8_0_0_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1detail_1_1Cast.html" target="_self">Cast</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_8_0_1_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1detail_1_1Cast_3_01float_00_01int8__t_01_4.html" target="_self">Cast< float, int8_t ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_8_0_2_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1detail_1_1Cast_3_01float_00_01uint8__t_01_4.html" target="_self">Cast< float, uint8_t ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_8_1_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span id="arr_0_8_1_" class="arrow" onclick="toggleFolder('0_8_1_')">►</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacecutlass_1_1reference_1_1device.html" target="_self">device</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_8_1_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_8_1_0_" class="arrow" onclick="toggleFolder('0_8_1_0_')">►</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacecutlass_1_1reference_1_1device_1_1detail.html" target="_self">detail</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_8_1_0_0_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span id="arr_0_8_1_0_0_" class="arrow" onclick="toggleFolder('0_8_1_0_0_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1device_1_1detail_1_1RandomGaussianFunc.html" target="_self">RandomGaussianFunc</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_8_1_0_0_0_" style="display:none;"><td class="entry"><span style="width:96px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1device_1_1detail_1_1RandomGaussianFunc_1_1Params.html" target="_self">Params</a></td><td class="desc">Parameters structure </td></tr>
|
||||
<tr id="row_0_8_1_0_1_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span id="arr_0_8_1_0_1_" class="arrow" onclick="toggleFolder('0_8_1_0_1_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1device_1_1detail_1_1RandomUniformFunc.html" target="_self">RandomUniformFunc</a></td><td class="desc">Computes a random Gaussian distribution </td></tr>
|
||||
<tr id="row_0_8_1_0_1_0_" style="display:none;"><td class="entry"><span style="width:96px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1device_1_1detail_1_1RandomUniformFunc_1_1Params.html" target="_self">Params</a></td><td class="desc">Parameters structure </td></tr>
|
||||
<tr id="row_0_8_1_0_2_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span id="arr_0_8_1_0_2_" class="arrow" onclick="toggleFolder('0_8_1_0_2_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1device_1_1detail_1_1TensorCopyDiagonalInFunc.html" target="_self">TensorCopyDiagonalInFunc</a></td><td class="desc">Computes a random Gaussian distribution </td></tr>
|
||||
<tr id="row_0_8_1_0_2_0_" style="display:none;"><td class="entry"><span style="width:96px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1device_1_1detail_1_1TensorCopyDiagonalInFunc_1_1Params.html" target="_self">Params</a></td><td class="desc">Parameters structure </td></tr>
|
||||
<tr id="row_0_8_1_0_3_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span id="arr_0_8_1_0_3_" class="arrow" onclick="toggleFolder('0_8_1_0_3_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1device_1_1detail_1_1TensorCopyDiagonalOutFunc.html" target="_self">TensorCopyDiagonalOutFunc</a></td><td class="desc">Computes a random Gaussian distribution </td></tr>
|
||||
<tr id="row_0_8_1_0_3_0_" style="display:none;"><td class="entry"><span style="width:96px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1device_1_1detail_1_1TensorCopyDiagonalOutFunc_1_1Params.html" target="_self">Params</a></td><td class="desc">Parameters structure </td></tr>
|
||||
<tr id="row_0_8_1_0_4_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span id="arr_0_8_1_0_4_" class="arrow" onclick="toggleFolder('0_8_1_0_4_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1device_1_1detail_1_1TensorFillDiagonalFunc.html" target="_self">TensorFillDiagonalFunc</a></td><td class="desc">Computes a random Gaussian distribution </td></tr>
|
||||
<tr id="row_0_8_1_0_4_0_" style="display:none;"><td class="entry"><span style="width:96px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1device_1_1detail_1_1TensorFillDiagonalFunc_1_1Params.html" target="_self">Params</a></td><td class="desc">Parameters structure </td></tr>
|
||||
<tr id="row_0_8_1_0_5_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span id="arr_0_8_1_0_5_" class="arrow" onclick="toggleFolder('0_8_1_0_5_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1device_1_1detail_1_1TensorFillLinearFunc.html" target="_self">TensorFillLinearFunc</a></td><td class="desc">Computes a random Gaussian distribution </td></tr>
|
||||
<tr id="row_0_8_1_0_5_0_" style="display:none;"><td class="entry"><span style="width:96px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1device_1_1detail_1_1TensorFillLinearFunc_1_1Params.html" target="_self">Params</a></td><td class="desc">Parameters structure </td></tr>
|
||||
<tr id="row_0_8_1_0_6_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span id="arr_0_8_1_0_6_" class="arrow" onclick="toggleFolder('0_8_1_0_6_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1device_1_1detail_1_1TensorFillRandomGaussianFunc.html" target="_self">TensorFillRandomGaussianFunc</a></td><td class="desc">Computes a random Gaussian distribution </td></tr>
|
||||
<tr id="row_0_8_1_0_6_0_" style="display:none;"><td class="entry"><span style="width:96px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1device_1_1detail_1_1TensorFillRandomGaussianFunc_1_1Params.html" target="_self">Params</a></td><td class="desc">Parameters structure </td></tr>
|
||||
<tr id="row_0_8_1_0_7_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span id="arr_0_8_1_0_7_" class="arrow" onclick="toggleFolder('0_8_1_0_7_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1device_1_1detail_1_1TensorFillRandomUniformFunc.html" target="_self">TensorFillRandomUniformFunc</a></td><td class="desc">Computes a random Gaussian distribution </td></tr>
|
||||
<tr id="row_0_8_1_0_7_0_" style="display:none;"><td class="entry"><span style="width:96px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1device_1_1detail_1_1TensorFillRandomUniformFunc_1_1Params.html" target="_self">Params</a></td><td class="desc">Parameters structure </td></tr>
|
||||
<tr id="row_0_8_1_0_8_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span id="arr_0_8_1_0_8_" class="arrow" onclick="toggleFolder('0_8_1_0_8_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1device_1_1detail_1_1TensorUpdateDiagonalFunc.html" target="_self">TensorUpdateDiagonalFunc</a></td><td class="desc">Computes a random Gaussian distribution </td></tr>
|
||||
<tr id="row_0_8_1_0_8_0_" style="display:none;"><td class="entry"><span style="width:96px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1device_1_1detail_1_1TensorUpdateDiagonalFunc_1_1Params.html" target="_self">Params</a></td><td class="desc">Parameters structure </td></tr>
|
||||
<tr id="row_0_8_1_0_9_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span id="arr_0_8_1_0_9_" class="arrow" onclick="toggleFolder('0_8_1_0_9_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1device_1_1detail_1_1TensorUpdateOffDiagonalFunc.html" target="_self">TensorUpdateOffDiagonalFunc</a></td><td class="desc">Computes a random Gaussian distribution </td></tr>
|
||||
<tr id="row_0_8_1_0_9_0_" style="display:none;"><td class="entry"><span style="width:96px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1device_1_1detail_1_1TensorUpdateOffDiagonalFunc_1_1Params.html" target="_self">Params</a></td><td class="desc">Parameters structure </td></tr>
|
||||
<tr id="row_0_8_1_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_8_1_1_" class="arrow" onclick="toggleFolder('0_8_1_1_')">►</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacecutlass_1_1reference_1_1device_1_1kernel.html" target="_self">kernel</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_8_1_1_0_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span id="arr_0_8_1_1_0_" class="arrow" onclick="toggleFolder('0_8_1_1_0_')">►</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacecutlass_1_1reference_1_1device_1_1kernel_1_1detail.html" target="_self">detail</a></td><td class="desc">Defines several helpers </td></tr>
|
||||
<tr id="row_0_8_1_1_0_0_" style="display:none;"><td class="entry"><span style="width:96px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1device_1_1kernel_1_1detail_1_1TensorForEachHelper.html" target="_self">TensorForEachHelper</a></td><td class="desc">Helper to perform for-each operation </td></tr>
|
||||
<tr id="row_0_8_1_1_0_1_" style="display:none;"><td class="entry"><span style="width:96px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1device_1_1kernel_1_1detail_1_1TensorForEachHelper_3_01Func_00_01Rank_00_010_01_4.html" target="_self">TensorForEachHelper< Func, Rank, 0 ></a></td><td class="desc">Helper to perform for-each operation </td></tr>
|
||||
<tr id="row_0_8_1_2_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_8_1_2_" class="arrow" onclick="toggleFolder('0_8_1_2_')">►</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacecutlass_1_1reference_1_1device_1_1thread.html" target="_self">thread</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_8_1_2_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1device_1_1thread_1_1Gemm.html" target="_self">Gemm</a></td><td class="desc">Thread-level blocked general matrix product </td></tr>
|
||||
<tr id="row_0_8_1_3_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1device_1_1BlockForEach.html" target="_self">BlockForEach</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_8_1_4_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1device_1_1Gemm.html" target="_self">Gemm</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_8_1_5_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1device_1_1Gemm_3_01ElementA_00_01LayoutA_00_01ElementB_00_01Layout4e016ab7cfc644acd7cb4ae770339773.html" target="_self">Gemm< ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, ScalarType, AccumulatorType, arch::OpMultiplyAdd ></a></td><td class="desc">Partial specialization for multiply-add </td></tr>
|
||||
<tr id="row_0_8_1_6_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1device_1_1Gemm_3_01ElementA_00_01LayoutA_00_01ElementB_00_01Layout30b72addd464a2ca4a26785cbfd77a8e.html" target="_self">Gemm< ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, ScalarType, AccumulatorType, arch::OpMultiplyAddSaturate ></a></td><td class="desc">Partial specialization for multiply-add-saturate </td></tr>
|
||||
<tr id="row_0_8_1_7_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1device_1_1Gemm_3_01ElementA_00_01LayoutA_00_01ElementB_00_01Layout660562b232f408218828ca5915b7e73a.html" target="_self">Gemm< ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, ScalarType, AccumulatorType, arch::OpXorPopc ></a></td><td class="desc">Partial specialization for XOR-popc </td></tr>
|
||||
<tr id="row_0_8_1_8_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1device_1_1TensorDiagonalForEach.html" target="_self">TensorDiagonalForEach</a></td><td class="desc">Launches a kernel calling a functor for each element along a tensor's diagonal </td></tr>
|
||||
<tr id="row_0_8_1_9_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1device_1_1TensorForEach.html" target="_self">TensorForEach</a></td><td class="desc">Launches a kernel calling a functor for each element in a tensor's index space </td></tr>
|
||||
<tr id="row_0_8_2_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span id="arr_0_8_2_" class="arrow" onclick="toggleFolder('0_8_2_')">►</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacecutlass_1_1reference_1_1host.html" target="_self">host</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_8_2_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_8_2_0_" class="arrow" onclick="toggleFolder('0_8_2_0_')">►</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacecutlass_1_1reference_1_1host_1_1detail.html" target="_self">detail</a></td><td class="desc">Defines several helpers </td></tr>
|
||||
<tr id="row_0_8_2_0_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1host_1_1detail_1_1RandomGaussianFunc.html" target="_self">RandomGaussianFunc</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_8_2_0_1_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1host_1_1detail_1_1RandomGaussianFunc_3_01complex_3_01Element_01_4_01_4.html" target="_self">RandomGaussianFunc< complex< Element > ></a></td><td class="desc">Partial specialization for initializing a complex value </td></tr>
|
||||
<tr id="row_0_8_2_0_2_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1host_1_1detail_1_1RandomUniformFunc.html" target="_self">RandomUniformFunc</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_8_2_0_3_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1host_1_1detail_1_1RandomUniformFunc_3_01complex_3_01Element_01_4_01_4.html" target="_self">RandomUniformFunc< complex< Element > ></a></td><td class="desc">Partial specialization for initializing a complex value </td></tr>
|
||||
<tr id="row_0_8_2_0_4_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1host_1_1detail_1_1TensorContainsFunc.html" target="_self">TensorContainsFunc</a></td><td class="desc">< Layout function </td></tr>
|
||||
<tr id="row_0_8_2_0_5_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1host_1_1detail_1_1TensorCopyIf.html" target="_self">TensorCopyIf</a></td><td class="desc">Helper to conditionally copy between tensor views </td></tr>
|
||||
<tr id="row_0_8_2_0_6_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1host_1_1detail_1_1TensorEqualsFunc.html" target="_self">TensorEqualsFunc</a></td><td class="desc">< Layout function </td></tr>
|
||||
<tr id="row_0_8_2_0_7_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1host_1_1detail_1_1TensorFillDiagonalFunc.html" target="_self">TensorFillDiagonalFunc</a></td><td class="desc">< Layout function </td></tr>
|
||||
<tr id="row_0_8_2_0_8_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1host_1_1detail_1_1TensorFillFunc.html" target="_self">TensorFillFunc</a></td><td class="desc">< Layout function </td></tr>
|
||||
<tr id="row_0_8_2_0_9_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1host_1_1detail_1_1TensorFillGaussianFunc.html" target="_self">TensorFillGaussianFunc</a></td><td class="desc">Computes a random Gaussian distribution </td></tr>
|
||||
<tr id="row_0_8_2_0_10_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1host_1_1detail_1_1TensorFillLinearFunc.html" target="_self">TensorFillLinearFunc</a></td><td class="desc">< Layout function </td></tr>
|
||||
<tr id="row_0_8_2_0_11_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1host_1_1detail_1_1TensorFillRandomUniformFunc.html" target="_self">TensorFillRandomUniformFunc</a></td><td class="desc">Computes a random Gaussian distribution </td></tr>
|
||||
<tr id="row_0_8_2_0_12_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1host_1_1detail_1_1TensorForEachHelper.html" target="_self">TensorForEachHelper</a></td><td class="desc">Helper to perform for-each operation </td></tr>
|
||||
<tr id="row_0_8_2_0_13_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1host_1_1detail_1_1TensorForEachHelper_3_01Func_00_01Rank_00_010_01_4.html" target="_self">TensorForEachHelper< Func, Rank, 0 ></a></td><td class="desc">Helper to perform for-each operation </td></tr>
|
||||
<tr id="row_0_8_2_0_14_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1host_1_1detail_1_1TensorFuncBinaryOp.html" target="_self">TensorFuncBinaryOp</a></td><td class="desc">Helper to apply a binary operator in place </td></tr>
|
||||
<tr id="row_0_8_2_0_15_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1host_1_1detail_1_1TensorUpdateOffDiagonalFunc.html" target="_self">TensorUpdateOffDiagonalFunc</a></td><td class="desc">< Layout function </td></tr>
|
||||
<tr id="row_0_8_2_0_16_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1host_1_1detail_1_1TrivialConvert.html" target="_self">TrivialConvert</a></td><td class="desc">Helper to convert between types </td></tr>
|
||||
<tr id="row_0_8_2_1_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1host_1_1BlockForEach.html" target="_self">BlockForEach</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_8_2_2_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1host_1_1Gemm.html" target="_self">Gemm</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_8_2_3_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1host_1_1Gemm_3_01ElementA_00_01LayoutA_00_01ElementB_00_01LayoutB_193dd3a37f00deff1e5dcd7c310afb1f.html" target="_self">Gemm< ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, ScalarType, ComputeType, arch::OpMultiplyAdd ></a></td><td class="desc">Partial specialization for multiply-add </td></tr>
|
||||
<tr id="row_0_8_2_4_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1host_1_1Gemm_3_01ElementA_00_01LayoutA_00_01ElementB_00_01LayoutB_55729eac7dbd6bf311ea36f680e83e93.html" target="_self">Gemm< ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, ScalarType, ComputeType, arch::OpMultiplyAddSaturate ></a></td><td class="desc">Partial specialization for multiply-add-saturate </td></tr>
|
||||
<tr id="row_0_8_2_5_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1reference_1_1host_1_1Gemm_3_01ElementA_00_01LayoutA_00_01ElementB_00_01LayoutB_4f3f32c4b336238abfd741e87bfced46.html" target="_self">Gemm< ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, ScalarType, ComputeType, arch::OpXorPopc ></a></td><td class="desc">Partial specialization for XOR-popc </td></tr>
|
||||
<tr id="row_0_9_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;"> </span><span id="arr_0_9_" class="arrow" onclick="toggleFolder('0_9_')">►</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacecutlass_1_1thread.html" target="_self">thread</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_9_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1thread_1_1Matrix.html" target="_self">Matrix</a></td><td class="desc">Per-thread matrix object storing a packed matrix </td></tr>
|
||||
<tr id="row_0_10_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;"> </span><span id="arr_0_10_" class="arrow" onclick="toggleFolder('0_10_')">►</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacecutlass_1_1transform.html" target="_self">transform</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_0_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span id="arr_0_10_0_" class="arrow" onclick="toggleFolder('0_10_0_')">►</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacecutlass_1_1transform_1_1thread.html" target="_self">thread</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_0_0_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1thread_1_1Transpose.html" target="_self">Transpose</a></td><td class="desc">Transforms a fragment by doing a transpose </td></tr>
|
||||
<tr id="row_0_10_0_1_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1transform_1_1thread_1_1Transpose_3_01ElementCount___00_01layout_1_1PitchLinearS99f8e05faf0bb5ed48a0154afe740d81.html" target="_self">Transpose< ElementCount_, layout::PitchLinearShape< 4, 4 >, int8_t ></a></td><td class="desc">Specialization for int8_t 4x4 transpose </td></tr>
|
||||
<tr id="row_0_10_1_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span id="arr_0_10_1_" class="arrow" onclick="toggleFolder('0_10_1_')">►</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacecutlass_1_1transform_1_1threadblock.html" target="_self">threadblock</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_0_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileAccessIterator.html" target="_self">PredicatedTileAccessIterator</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_1_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileAccessIterator2dThreadTile.html" target="_self">PredicatedTileAccessIterator2dThreadTile</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_2_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_10_1_2_" class="arrow" onclick="toggleFolder('0_10_1_2_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileAccessIterator2dThreadTile_3_01Shape__da632779aba661c0f4cfaaa78126b771.html" target="_self">PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, AccessType_ ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_2_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileAccessIterator2dThreadTile_3_01Shape__18e9cf25bb3b8edfaad595241a6dc2d7.html" target="_self">Params</a></td><td class="desc">Parameters object is precomputed state and is host-constructible </td></tr>
|
||||
<tr id="row_0_10_1_3_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_10_1_3_" class="arrow" onclick="toggleFolder('0_10_1_3_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileAccessIterator2dThreadTile_3_01Shape__1790abaa54a01f277d75766d5882fec8.html" target="_self">PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_3_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileAccessIterator2dThreadTile_3_01Shape__8ccc62d47a092afc8bee32ffe9d1e4ba.html" target="_self">Params</a></td><td class="desc">Parameters object is precomputed state and is host-constructible </td></tr>
|
||||
<tr id="row_0_10_1_4_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_10_1_4_" class="arrow" onclick="toggleFolder('0_10_1_4_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileAccessIterator2dThreadTile_3_01Shape__7327fa15996bcb8502cdfcc192350fe1.html" target="_self">PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, AccessType_ ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_4_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileAccessIterator2dThreadTile_3_01Shape__a56cbccec33ee916292ad9d068474609.html" target="_self">Params</a></td><td class="desc">Parameters object is precomputed state and is host-constructible </td></tr>
|
||||
<tr id="row_0_10_1_5_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_10_1_5_" class="arrow" onclick="toggleFolder('0_10_1_5_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileAccessIterator_3_01Shape___00_01Elemen89c687c583745a73cb485041911a4c4e.html" target="_self">PredicatedTileAccessIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, AccessType_ ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_5_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileAccessIterator_3_01Shape___00_01Elemenc07b5ec72f83e782121ac629288d61fe.html" target="_self">Params</a></td><td class="desc">Parameters object is precomputed state and is host-constructible </td></tr>
|
||||
<tr id="row_0_10_1_6_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_10_1_6_" class="arrow" onclick="toggleFolder('0_10_1_6_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileAccessIterator_3_01Shape___00_01Elemenab63a1e105bf37f6371516cb9e2c5a7a.html" target="_self">PredicatedTileAccessIterator< Shape_, Element_, layout::ColumnMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessType_ ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_6_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileAccessIterator_3_01Shape___00_01Elemena9b06926a275b569ee9f7f142604b997.html" target="_self">Params</a></td><td class="desc">Parameters object is precomputed state and is host-constructible </td></tr>
|
||||
<tr id="row_0_10_1_7_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_10_1_7_" class="arrow" onclick="toggleFolder('0_10_1_7_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileAccessIterator_3_01Shape___00_01Elemen784a0e9da3f55064c47e5613791f51f7.html" target="_self">PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_7_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileAccessIterator_3_01Shape___00_01Elemen41e459f664d17473570cf22fb616845f.html" target="_self">Params</a></td><td class="desc">Parameters object is precomputed state and is host-constructible </td></tr>
|
||||
<tr id="row_0_10_1_8_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_10_1_8_" class="arrow" onclick="toggleFolder('0_10_1_8_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileAccessIterator_3_01Shape___00_01Elemen9838736ad62fae54213fbaf722a989ab.html" target="_self">PredicatedTileAccessIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, AccessType_ ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_8_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileAccessIterator_3_01Shape___00_01Elemen44ce348364e78f5a56fa0c2cef6af930.html" target="_self">Params</a></td><td class="desc">Parameters object is precomputed state and is host-constructible </td></tr>
|
||||
<tr id="row_0_10_1_9_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_10_1_9_" class="arrow" onclick="toggleFolder('0_10_1_9_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileAccessIterator_3_01Shape___00_01Elemen809793e785fb4211888c6b4e5dcfcb39.html" target="_self">PredicatedTileAccessIterator< Shape_, Element_, layout::RowMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessType_ ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_9_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileAccessIterator_3_01Shape___00_01Elemen058417e2cdd86f3cd6ad5458581571c8.html" target="_self">Params</a></td><td class="desc">Parameters object is precomputed state and is host-constructible </td></tr>
|
||||
<tr id="row_0_10_1_10_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileIterator.html" target="_self">PredicatedTileIterator</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_11_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileIterator2dThreadTile.html" target="_self">PredicatedTileIterator2dThreadTile</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_12_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_10_1_12_" class="arrow" onclick="toggleFolder('0_10_1_12_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileIterator2dThreadTile_3_01Shape___00_0165b39a630d10785a3558406f9adb99b9.html" target="_self">PredicatedTileIterator2dThreadTile< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Transpose_ ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_12_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileIterator2dThreadTile_3_01Shape___00_01e11ed7192af5d7ad1bce5641fa13112e.html" target="_self">Params</a></td><td class="desc">Parameters object is precomputed state and is host-constructible </td></tr>
|
||||
<tr id="row_0_10_1_13_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_10_1_13_" class="arrow" onclick="toggleFolder('0_10_1_13_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileIterator2dThreadTile_3_01Shape___00_017a517f3c73efd795ab05059cc9b111e1.html" target="_self">PredicatedTileIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Transpose_ ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_13_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1transform_1_1threadblock_1_1PredicatedTileIterator2dThreadTile_3_01Shape___00_0b878062cc0cd214bf7e17d74ff17e246.html" target="_self">AccessType</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_13_1_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileIterator2dThreadTile_3_01Shape___00_0145ef045e8f7d57dc718098adcb00cf3d.html" target="_self">Params</a></td><td class="desc">Parameters object is precomputed state and is host-constructible </td></tr>
|
||||
<tr id="row_0_10_1_14_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_10_1_14_" class="arrow" onclick="toggleFolder('0_10_1_14_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileIterator2dThreadTile_3_01Shape___00_013671177d6219bfeb0e1b4dc4c1b5bf11.html" target="_self">PredicatedTileIterator2dThreadTile< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Transpose_ ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_14_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileIterator2dThreadTile_3_01Shape___00_0102e766863c6ac9ec2063a02c4803eecb.html" target="_self">Params</a></td><td class="desc">Parameters object is precomputed state and is host-constructible </td></tr>
|
||||
<tr id="row_0_10_1_15_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_10_1_15_" class="arrow" onclick="toggleFolder('0_10_1_15_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileIterator_3_01Shape___00_01Element___0068b3e874b5d93d11f0fa902c7f1d11d9.html" target="_self">PredicatedTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, AccessSize ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_15_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileIterator_3_01Shape___00_01Element___00a6b756b1bcfbb35fe4a3e68ff074e380.html" target="_self">Params</a></td><td class="desc">Parameters object is precomputed state and is host-constructible </td></tr>
|
||||
<tr id="row_0_10_1_16_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_10_1_16_" class="arrow" onclick="toggleFolder('0_10_1_16_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileIterator_3_01Shape___00_01Element___00f6b3a9dfab5e7c72d5233f7e5e6e3b9b.html" target="_self">PredicatedTileIterator< Shape_, Element_, layout::ColumnMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessSize ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_16_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileIterator_3_01Shape___00_01Element___00ebd1a63351e1085d0b718582ec7b06c8.html" target="_self">Params</a></td><td class="desc">Parameters object is precomputed state and is host-constructible </td></tr>
|
||||
<tr id="row_0_10_1_17_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_10_1_17_" class="arrow" onclick="toggleFolder('0_10_1_17_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileIterator_3_01Shape___00_01Element___00e7c2c404e7aedfe60ad56bb5571306a1.html" target="_self">PredicatedTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessSize ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_17_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileIterator_3_01Shape___00_01Element___006a5f2f7a8271031e6cdc5daa5441f2af.html" target="_self">Params</a></td><td class="desc">Parameters object is precomputed state and is host-constructible </td></tr>
|
||||
<tr id="row_0_10_1_18_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_10_1_18_" class="arrow" onclick="toggleFolder('0_10_1_18_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileIterator_3_01Shape___00_01Element___0041ea81994f8af0d4d071fdb9e66b5ff0.html" target="_self">PredicatedTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, AccessSize ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_18_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileIterator_3_01Shape___00_01Element___004d0f9b5e19c29acc17bcdc360dafebbd.html" target="_self">Params</a></td><td class="desc">Parameters object is precomputed state and is host-constructible </td></tr>
|
||||
<tr id="row_0_10_1_19_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_10_1_19_" class="arrow" onclick="toggleFolder('0_10_1_19_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileIterator_3_01Shape___00_01Element___00d670f969180a8d182dffb356ebcc957e.html" target="_self">PredicatedTileIterator< Shape_, Element_, layout::RowMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessSize ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_19_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileIterator_3_01Shape___00_01Element___009fd89f6dad84238fd7d63df0a0c0364f.html" target="_self">Params</a></td><td class="desc">Parameters object is precomputed state and is host-constructible </td></tr>
|
||||
<tr id="row_0_10_1_20_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1RegularTileAccessIterator.html" target="_self">RegularTileAccessIterator</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_21_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1RegularTileAccessIterator_3_01Shape___00_01Element__eb7d20f8b9d69e0ae5e7ef51dc480867.html" target="_self">RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_22_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1RegularTileAccessIterator_3_01Shape___00_01Element__2c1476eaf582bfe972793e17babfe985.html" target="_self">RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajorTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value, int(128/sizeof(Element_))>, AdvanceRank, ThreadMap_, Alignment ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_23_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1RegularTileAccessIterator_3_01Shape___00_01Element__a3c11cf1f00ef7a1efb8389ac6e4c6e0.html" target="_self">RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajorTensorOpMultiplicandCrosswise< sizeof_bits< Element_ >::value, Crosswise >, AdvanceRank, ThreadMap_, Alignment ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_24_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1RegularTileAccessIterator_3_01Shape___00_01Element__0855e9d9ab619202d2397180c1e4c4a5.html" target="_self">RegularTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_25_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1RegularTileAccessIterator_3_01Shape___00_01Element__f04332958a49a47d6fb2b25201764630.html" target="_self">RegularTileAccessIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_26_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1RegularTileAccessIterator_3_01Shape___00_01Element__6baada077236f1a368c61c5e11b45b72.html" target="_self">RegularTileAccessIterator< Shape_, Element_, layout::RowMajorTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value, int(128/sizeof(Element_))>, AdvanceRank, ThreadMap_, Alignment ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_27_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1RegularTileAccessIterator_3_01Shape___00_01Element__0184b7188941788a96624510a4b2f876.html" target="_self">RegularTileAccessIterator< Shape_, Element_, layout::RowMajorTensorOpMultiplicandCrosswise< sizeof_bits< Element_ >::value, Crosswise >, AdvanceRank, ThreadMap_, Alignment ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_28_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_10_1_28_" class="arrow" onclick="toggleFolder('0_10_1_28_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1RegularTileAccessIterator_3_01Shape___00_01Element__ebf4714349612673e8b6609b763eeb6f.html" target="_self">RegularTileAccessIterator< Shape_, Element_, layout::TensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value, int(128/sizeof(Element_))>, AdvanceRank, ThreadMap_, Alignment ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_28_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1transform_1_1threadblock_1_1RegularTileAccessIterator_3_01Shape___00_01Element_0a9491607d11be8e1780e79ad711aa42.html" target="_self">Detail</a></td><td class="desc">Internal details made public to facilitate introspection </td></tr>
|
||||
<tr id="row_0_10_1_29_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_10_1_29_" class="arrow" onclick="toggleFolder('0_10_1_29_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1RegularTileAccessIterator_3_01Shape___00_01Element__e9a9e0f4286f652f55eb9b863b21effe.html" target="_self">RegularTileAccessIterator< Shape_, Element_, layout::TensorOpMultiplicandCrosswise< sizeof_bits< Element_ >::value, Crosswise >, AdvanceRank, ThreadMap_, Alignment ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_29_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1transform_1_1threadblock_1_1RegularTileAccessIterator_3_01Shape___00_01Element_3be8b96d170d886f39b6b30acab65e7a.html" target="_self">Detail</a></td><td class="desc">Internal details made public to facilitate introspection </td></tr>
|
||||
<tr id="row_0_10_1_30_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator.html" target="_self">RegularTileIterator</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_31_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator2dThreadTile.html" target="_self">RegularTileIterator2dThreadTile</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_32_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator2dThreadTile_3_01Shape___00_01Eleb60d066756d1c18f05fceee6a27bdb8a.html" target="_self">RegularTileIterator2dThreadTile< Shape_, Element_, layout::ColumnMajorInterleaved< 4 >, AdvanceRank, ThreadMap_, Alignment ></a></td><td class="desc">Regular tile iterator specialized for interleaved layout + 2d thread-tiled threadmapping </td></tr>
|
||||
<tr id="row_0_10_1_33_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator2dThreadTile_3_01Shape___00_01Ele76ed82829532ae1c17f4c78158f036c7.html" target="_self">RegularTileIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment ></a></td><td class="desc">Regular tile iterator specialized for pitch-linear + 2d thread-tiled threadmapping </td></tr>
|
||||
<tr id="row_0_10_1_34_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator2dThreadTile_3_01Shape___00_01Ele654c8f6161ae5340f040397a4e2e045c.html" target="_self">RegularTileIterator2dThreadTile< Shape_, Element_, layout::RowMajorInterleaved< 4 >, AdvanceRank, ThreadMap_, Alignment ></a></td><td class="desc">Regular tile iterator specialized for interleaved layout + 2d thread-tiled threadmapping </td></tr>
|
||||
<tr id="row_0_10_1_35_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape___00_01Element___00_011d3637dbd8bc58bcb020b51bf57fbfc0.html" target="_self">RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment ></a></td><td class="desc">Regular tile iterator specialized for pitch-linear </td></tr>
|
||||
<tr id="row_0_10_1_36_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape___00_01Element___00_017982f81d4ef592e19c8427de2ea933a3.html" target="_self">RegularTileIterator< Shape_, Element_, layout::ColumnMajorTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value, int(128/sizeof(Element_))>, AdvanceRank, ThreadMap_, Alignment ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_37_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape___00_01Element___00_010889a732373c350de9b9a9f6c13cd761.html" target="_self">RegularTileIterator< Shape_, Element_, layout::ColumnMajorTensorOpMultiplicandCrosswise< sizeof_bits< Element_ >::value, Crosswise >, AdvanceRank, ThreadMap_, Alignment ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_38_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape___00_01Element___00_01187f8574e1fe9d7d5e8fbf09bd834bf0.html" target="_self">RegularTileIterator< Shape_, Element_, layout::ColumnMajorVoltaTensorOpMultiplicandBCongruous< sizeof_bits< Element_ >::value >, AdvanceRank, ThreadMap_, Alignment ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_39_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape___00_01Element___00_01793f74bfd8f116a827948ab01a37349a.html" target="_self">RegularTileIterator< Shape_, Element_, layout::ColumnMajorVoltaTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value >, AdvanceRank, ThreadMap_, Alignment ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_40_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape___00_01Element___00_01bd31b3810c1fedf2e7e5959ff92b5d3d.html" target="_self">RegularTileIterator< Shape_, Element_, layout::ColumnMajorVoltaTensorOpMultiplicandCrosswise< sizeof_bits< Element_ >::value, Shape_::kRow >, AdvanceRank, ThreadMap_, Alignment ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_41_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape___00_01Element___00_0184a89653916f5d51ab59d1b386989a17.html" target="_self">RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment ></a></td><td class="desc">Regular tile iterator specialized for pitch-linear </td></tr>
|
||||
<tr id="row_0_10_1_42_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape___00_01Element___00_0149454d361ea5885cf5166a920b5145df.html" target="_self">RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment ></a></td><td class="desc">Regular tile iterator specialized for pitch-linear </td></tr>
|
||||
<tr id="row_0_10_1_43_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape___00_01Element___00_01c20d35180520077a5a09b1e33543c1a5.html" target="_self">RegularTileIterator< Shape_, Element_, layout::RowMajorTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value, int(128/sizeof(Element_))>, AdvanceRank, ThreadMap_, Alignment ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_44_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape___00_01Element___00_01a31b454d9c930525c1e9ca406a514f40.html" target="_self">RegularTileIterator< Shape_, Element_, layout::RowMajorTensorOpMultiplicandCrosswise< sizeof_bits< Element_ >::value, Crosswise >, AdvanceRank, ThreadMap_, Alignment ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_45_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape___00_01Element___00_0104ad31bd559a88cc418ae1cab7492ed5.html" target="_self">RegularTileIterator< Shape_, Element_, layout::RowMajorVoltaTensorOpMultiplicandBCongruous< sizeof_bits< Element_ >::value >, AdvanceRank, ThreadMap_, Alignment ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_46_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape___00_01Element___00_01f6f6511b5033cad31083644ac69c54d8.html" target="_self">RegularTileIterator< Shape_, Element_, layout::RowMajorVoltaTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value >, AdvanceRank, ThreadMap_, Alignment ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_47_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape___00_01Element___00_01b3fa5720e807697de61b9f937b269cd0.html" target="_self">RegularTileIterator< Shape_, Element_, layout::RowMajorVoltaTensorOpMultiplicandCrosswise< sizeof_bits< Element_ >::value, Shape_::kColumn >, AdvanceRank, ThreadMap_, Alignment ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_48_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_10_1_48_" class="arrow" onclick="toggleFolder('0_10_1_48_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape___00_01Element___00_01efd5013a2503d6567e2bf6b40c97360c.html" target="_self">RegularTileIterator< Shape_, Element_, layout::TensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value, int(128/sizeof(Element_))>, AdvanceRank, ThreadMap_, Alignment ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_48_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape___00_01Element___00_052caec9d5bceeb59b9a13cb3338ce64d.html" target="_self">Detail</a></td><td class="desc">Internal details made public to facilitate introspection </td></tr>
|
||||
<tr id="row_0_10_1_49_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_10_1_49_" class="arrow" onclick="toggleFolder('0_10_1_49_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape___00_01Element___00_0197fef2242a3454a7d1cebe61aee28b43.html" target="_self">RegularTileIterator< Shape_, Element_, layout::TensorOpMultiplicandCrosswise< sizeof_bits< Element_ >::value, Crosswise >, AdvanceRank, ThreadMap_, Alignment ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_49_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape___00_01Element___00_039093927f4b1ee61538c569bf1ae4efd.html" target="_self">Detail</a></td><td class="desc">Internal details made public to facilitate introspection </td></tr>
|
||||
<tr id="row_0_10_1_50_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_10_1_50_" class="arrow" onclick="toggleFolder('0_10_1_50_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape___00_01Element___00_01a75d2cd74e722d6ad6a3b41aabfd432d.html" target="_self">RegularTileIterator< Shape_, Element_, layout::VoltaTensorOpMultiplicandBCongruous< sizeof_bits< Element_ >::value >, AdvanceRank, ThreadMap_, Alignment ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_50_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape___00_01Element___00_02d305cfb0b55c6fb236a52cf2240651e.html" target="_self">Detail</a></td><td class="desc">Internal details made public to facilitate introspection </td></tr>
|
||||
<tr id="row_0_10_1_51_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_10_1_51_" class="arrow" onclick="toggleFolder('0_10_1_51_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape___00_01Element___00_01f96bbeb63e6d4ce4a2551279de3a9f0e.html" target="_self">RegularTileIterator< Shape_, Element_, layout::VoltaTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value >, AdvanceRank, ThreadMap_, Alignment ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_51_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape___00_01Element___00_032f88d1be8b209e44a4815c707ba35bb.html" target="_self">Detail</a></td><td class="desc">Internal details made public to facilitate introspection </td></tr>
|
||||
<tr id="row_0_10_1_52_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span id="arr_0_10_1_52_" class="arrow" onclick="toggleFolder('0_10_1_52_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape___00_01Element___00_01dbd6b8468d5bd787308d2f615a24d123.html" target="_self">RegularTileIterator< Shape_, Element_, layout::VoltaTensorOpMultiplicandCrosswise< sizeof_bits< Element_ >::value, Shape_::kContiguous >, AdvanceRank, ThreadMap_, Alignment ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_1_52_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape___00_01Element___00_0390833403016f5d817416e20828845df.html" target="_self">Detail</a></td><td class="desc">Internal details made public to facilitate introspection </td></tr>
|
||||
<tr id="row_0_10_2_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1transform_1_1PitchLinear2DThreadTileStripminedThreadMap.html" target="_self">PitchLinear2DThreadTileStripminedThreadMap</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_3_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span id="arr_0_10_3_" class="arrow" onclick="toggleFolder('0_10_3_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1transform_1_1PitchLinear2DThreadTileStripminedThreadMap_3_01Shape___00_01Thread0082c3467229b12cc9dd996283ee7160.html" target="_self">PitchLinear2DThreadTileStripminedThreadMap< Shape_, Threads, cutlass::layout::PitchLinearShape< 4, 4 > ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_3_0_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1transform_1_1PitchLinear2DThreadTileStripminedThreadMap_3_01Shape___00_01Thread896c01a3c466da1bf392e0cdfced4d53.html" target="_self">Detail</a></td><td class="desc">Internal implementation details </td></tr>
|
||||
<tr id="row_0_10_4_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span id="arr_0_10_4_" class="arrow" onclick="toggleFolder('0_10_4_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1transform_1_1PitchLinearStripminedThreadMap.html" target="_self">PitchLinearStripminedThreadMap</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_4_0_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1transform_1_1PitchLinearStripminedThreadMap_1_1Detail.html" target="_self">Detail</a></td><td class="desc">Internal implementation details </td></tr>
|
||||
<tr id="row_0_10_5_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1transform_1_1PitchLinearTilePolicyStripminedThreadContiguous.html" target="_self">PitchLinearTilePolicyStripminedThreadContiguous</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_6_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1transform_1_1PitchLinearTilePolicyStripminedThreadStrided.html" target="_self">PitchLinearTilePolicyStripminedThreadStrided</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_7_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span id="arr_0_10_7_" class="arrow" onclick="toggleFolder('0_10_7_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1transform_1_1PitchLinearWarpRakedThreadMap.html" target="_self">PitchLinearWarpRakedThreadMap</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_7_0_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1transform_1_1PitchLinearWarpRakedThreadMap_1_1Detail.html" target="_self">Detail</a></td><td class="desc">Internal details made public to facilitate introspection Iterations along each dimension (concept: PitchLinearShape) </td></tr>
|
||||
<tr id="row_0_10_8_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span id="arr_0_10_8_" class="arrow" onclick="toggleFolder('0_10_8_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1transform_1_1PitchLinearWarpStripedThreadMap.html" target="_self">PitchLinearWarpStripedThreadMap</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_8_0_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1transform_1_1PitchLinearWarpStripedThreadMap_1_1Detail.html" target="_self">Detail</a></td><td class="desc">Internal details made public to facilitate introspection Iterations along each dimension (concept: PitchLinearShape) </td></tr>
|
||||
<tr id="row_0_10_9_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span id="arr_0_10_9_" class="arrow" onclick="toggleFolder('0_10_9_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1transform_1_1TransposePitchLinearThreadMap.html" target="_self">TransposePitchLinearThreadMap</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_10_9_0_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1transform_1_1TransposePitchLinearThreadMap_1_1Detail.html" target="_self">Detail</a></td><td class="desc">Internal details made public to facilitate introspection Iterations along each dimension (concept: PitchLinearShape) </td></tr>
|
||||
<tr id="row_0_10_10_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1transform_1_1TransposePitchLinearThreadMap2DThreadTile.html" target="_self">TransposePitchLinearThreadMap2DThreadTile</a></td><td class="desc">Thread Mapping a 2D threadtiled mapping as a transposed Pitchlinear2DThreadTile mapping </td></tr>
|
||||
<tr id="row_0_10_11_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1transform_1_1TransposePitchLinearThreadMapSimt.html" target="_self">TransposePitchLinearThreadMapSimt</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_11_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1AlignedArray.html" target="_self">AlignedArray</a></td><td class="desc">Aligned array type </td></tr>
|
||||
<tr id="row_0_12_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1AlignedBuffer.html" target="_self">AlignedBuffer</a></td><td class="desc">Modifies semantics of cutlass::Array<> to provide guaranteed alignment </td></tr>
|
||||
<tr id="row_0_13_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;"> </span><span id="arr_0_13_" class="arrow" onclick="toggleFolder('0_13_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html" target="_self">Array< T, N, false ></a></td><td class="desc">Statically sized array for any data type </td></tr>
|
||||
<tr id="row_0_13_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__iterator.html" target="_self">const_iterator</a></td><td class="desc">Bidirectional constant iterator over elements </td></tr>
|
||||
<tr id="row_0_13_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reference.html" target="_self">const_reference</a></td><td class="desc">Reference object extracts sub-byte items </td></tr>
|
||||
<tr id="row_0_13_2_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reverse__iterator.html" target="_self">const_reverse_iterator</a></td><td class="desc">Bidirectional constant iterator over elements </td></tr>
|
||||
<tr id="row_0_13_3_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html" target="_self">iterator</a></td><td class="desc">Bidirectional iterator over elements </td></tr>
|
||||
<tr id="row_0_13_4_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reference.html" target="_self">reference</a></td><td class="desc">Reference object inserts or extracts sub-byte items </td></tr>
|
||||
<tr id="row_0_13_5_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reverse__iterator.html" target="_self">reverse_iterator</a></td><td class="desc">Bidirectional iterator over elements </td></tr>
|
||||
<tr id="row_0_14_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;"> </span><span id="arr_0_14_" class="arrow" onclick="toggleFolder('0_14_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html" target="_self">Array< T, N, true ></a></td><td class="desc">Statically sized array for any data type </td></tr>
|
||||
<tr id="row_0_14_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html" target="_self">const_iterator</a></td><td class="desc">Bidirectional constant iterator over elements </td></tr>
|
||||
<tr id="row_0_14_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__reverse__iterator.html" target="_self">const_reverse_iterator</a></td><td class="desc">Bidirectional constant iterator over elements </td></tr>
|
||||
<tr id="row_0_14_2_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1iterator.html" target="_self">iterator</a></td><td class="desc">Bidirectional iterator over elements </td></tr>
|
||||
<tr id="row_0_14_3_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1reverse__iterator.html" target="_self">reverse_iterator</a></td><td class="desc">Bidirectional iterator over elements </td></tr>
|
||||
<tr id="row_0_15_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1CommandLine.html" target="_self">CommandLine</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_16_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1complex.html" target="_self">complex</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_17_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1ConstSubbyteReference.html" target="_self">ConstSubbyteReference</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_18_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1Coord.html" target="_self">Coord</a></td><td class="desc">Statically-sized array specifying Coords within a tensor </td></tr>
|
||||
<tr id="row_0_19_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1cuda__exception.html" target="_self">cuda_exception</a></td><td class="desc">C++ exception wrapper for CUDA <code>cudaError_t</code> </td></tr>
|
||||
<tr id="row_0_20_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1Distribution.html" target="_self">Distribution</a></td><td class="desc"><a class="el" href="structcutlass_1_1Distribution.html" title="Distribution type. ">Distribution</a> type </td></tr>
|
||||
<tr id="row_0_21_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1divide__assert.html" target="_self">divide_assert</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_22_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1divides.html" target="_self">divides</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_23_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1divides_3_01Array_3_01half__t_00_01N_01_4_01_4.html" target="_self">divides< Array< half_t, N > ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_24_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1divides_3_01Array_3_01T_00_01N_01_4_01_4.html" target="_self">divides< Array< T, N > ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_25_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1FloatType.html" target="_self">FloatType</a></td><td class="desc">Defines a floating-point type based on the number of exponent and mantissa bits </td></tr>
|
||||
<tr id="row_0_26_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1FloatType_3_0111_00_0152_01_4.html" target="_self">FloatType< 11, 52 ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_27_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1FloatType_3_015_00_0110_01_4.html" target="_self">FloatType< 5, 10 ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_28_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1FloatType_3_018_00_0123_01_4.html" target="_self">FloatType< 8, 23 ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_29_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1half__t.html" target="_self">half_t</a></td><td class="desc">IEEE half-precision floating-point type </td></tr>
|
||||
<tr id="row_0_30_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1HostTensor.html" target="_self">HostTensor</a></td><td class="desc">Host tensor </td></tr>
|
||||
<tr id="row_0_31_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1IdentityTensorLayout.html" target="_self">IdentityTensorLayout</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_32_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1integer__subbyte.html" target="_self">integer_subbyte</a></td><td class="desc">4-bit signed integer type </td></tr>
|
||||
<tr id="row_0_33_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1IntegerType.html" target="_self">IntegerType</a></td><td class="desc">Defines integers based on size and whether they are signed </td></tr>
|
||||
<tr id="row_0_34_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1IntegerType_3_011_00_01false_01_4.html" target="_self">IntegerType< 1, false ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_35_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1IntegerType_3_011_00_01true_01_4.html" target="_self">IntegerType< 1, true ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_36_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1IntegerType_3_0116_00_01false_01_4.html" target="_self">IntegerType< 16, false ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_37_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1IntegerType_3_0116_00_01true_01_4.html" target="_self">IntegerType< 16, true ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_38_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1IntegerType_3_0132_00_01false_01_4.html" target="_self">IntegerType< 32, false ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_39_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1IntegerType_3_0132_00_01true_01_4.html" target="_self">IntegerType< 32, true ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_40_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1IntegerType_3_014_00_01false_01_4.html" target="_self">IntegerType< 4, false ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_41_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1IntegerType_3_014_00_01true_01_4.html" target="_self">IntegerType< 4, true ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_42_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1IntegerType_3_0164_00_01false_01_4.html" target="_self">IntegerType< 64, false ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_43_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1IntegerType_3_0164_00_01true_01_4.html" target="_self">IntegerType< 64, true ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_44_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1IntegerType_3_018_00_01false_01_4.html" target="_self">IntegerType< 8, false ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_45_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1IntegerType_3_018_00_01true_01_4.html" target="_self">IntegerType< 8, true ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_46_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1is__pow2.html" target="_self">is_pow2</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_47_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1KernelLaunchConfiguration.html" target="_self">KernelLaunchConfiguration</a></td><td class="desc">Structure containing the basic launch configuration of a CUDA kernel </td></tr>
|
||||
<tr id="row_0_48_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1log2__down.html" target="_self">log2_down</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_49_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1log2__down_3_01N_00_011_00_01Count_01_4.html" target="_self">log2_down< N, 1, Count ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_50_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1log2__up.html" target="_self">log2_up</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_51_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1log2__up_3_01N_00_011_00_01Count_01_4.html" target="_self">log2_up< N, 1, Count ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_52_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1MatrixCoord.html" target="_self">MatrixCoord</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_53_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1MatrixShape.html" target="_self">MatrixShape</a></td><td class="desc">Describes the size of a matrix tile </td></tr>
|
||||
<tr id="row_0_54_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1Max.html" target="_self">Max</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_55_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1maximum.html" target="_self">maximum</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_56_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1maximum_3_01Array_3_01T_00_01N_01_4_01_4.html" target="_self">maximum< Array< T, N > ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_57_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1maximum_3_01float_01_4.html" target="_self">maximum< float ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_58_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1Min.html" target="_self">Min</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_59_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1minimum.html" target="_self">minimum</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_60_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1minimum_3_01Array_3_01T_00_01N_01_4_01_4.html" target="_self">minimum< Array< T, N > ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_61_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1minimum_3_01float_01_4.html" target="_self">minimum< float ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_62_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1minus.html" target="_self">minus</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_63_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1minus_3_01Array_3_01half__t_00_01N_01_4_01_4.html" target="_self">minus< Array< half_t, N > ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_64_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1minus_3_01Array_3_01T_00_01N_01_4_01_4.html" target="_self">minus< Array< T, N > ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_65_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1multiplies.html" target="_self">multiplies</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_66_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1multiplies_3_01Array_3_01half__t_00_01N_01_4_01_4.html" target="_self">multiplies< Array< half_t, N > ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_67_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1multiplies_3_01Array_3_01T_00_01N_01_4_01_4.html" target="_self">multiplies< Array< T, N > ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_68_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1multiply__add.html" target="_self">multiply_add</a></td><td class="desc">Fused multiply-add </td></tr>
|
||||
<tr id="row_0_69_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1multiply__add_3_01Array_3_01half__t_00_01N_01_4_00_01Array_3_01half__t_00_01N_01adaeadb27c0e4439444709c0eb30963.html" target="_self">multiply_add< Array< half_t, N >, Array< half_t, N >, Array< half_t, N > ></a></td><td class="desc">Fused multiply-add </td></tr>
|
||||
<tr id="row_0_70_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1multiply__add_3_01Array_3_01T_00_01N_01_4_00_01Array_3_01T_00_01N_01_4_00_01Array_3_01T_00_01N_01_4_01_4.html" target="_self">multiply_add< Array< T, N >, Array< T, N >, Array< T, N > ></a></td><td class="desc">Fused multiply-add </td></tr>
|
||||
<tr id="row_0_71_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1multiply__add_3_01complex_3_01T_01_4_00_01complex_3_01T_01_4_00_01complex_3_01T_01_4_01_4.html" target="_self">multiply_add< complex< T >, complex< T >, complex< T > ></a></td><td class="desc">Fused multiply-add </td></tr>
|
||||
<tr id="row_0_72_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1multiply__add_3_01complex_3_01T_01_4_00_01T_00_01complex_3_01T_01_4_01_4.html" target="_self">multiply_add< complex< T >, T, complex< T > ></a></td><td class="desc">Fused multiply-add </td></tr>
|
||||
<tr id="row_0_73_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1multiply__add_3_01T_00_01complex_3_01T_01_4_00_01complex_3_01T_01_4_01_4.html" target="_self">multiply_add< T, complex< T >, complex< T > ></a></td><td class="desc">Fused multiply-add </td></tr>
|
||||
<tr id="row_0_74_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1negate.html" target="_self">negate</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_75_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1negate_3_01Array_3_01half__t_00_01N_01_4_01_4.html" target="_self">negate< Array< half_t, N > ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_76_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1negate_3_01Array_3_01T_00_01N_01_4_01_4.html" target="_self">negate< Array< T, N > ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_77_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1NumericArrayConverter.html" target="_self">NumericArrayConverter</a></td><td class="desc">Conversion operator for Array </td></tr>
|
||||
<tr id="row_0_78_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1NumericArrayConverter_3_01float_00_01half__t_00_012_00_01Round_01_4.html" target="_self">NumericArrayConverter< float, half_t, 2, Round ></a></td><td class="desc">Partial specialization for Array<float, 2> <= Array<half_t, 2>, round to nearest </td></tr>
|
||||
<tr id="row_0_79_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1NumericArrayConverter_3_01float_00_01half__t_00_01N_00_01Round_01_4.html" target="_self">NumericArrayConverter< float, half_t, N, Round ></a></td><td class="desc">Partial specialization for Array<half> <= Array<float> </td></tr>
|
||||
<tr id="row_0_80_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1NumericArrayConverter_3_01half__t_00_01float_00_012_00_01FloatRoundStyle_1_1round__to__nearest_01_4.html" target="_self">NumericArrayConverter< half_t, float, 2, FloatRoundStyle::round_to_nearest ></a></td><td class="desc">Partial specialization for Array<half, 2> <= Array<float, 2>, round to nearest </td></tr>
|
||||
<tr id="row_0_81_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1NumericArrayConverter_3_01half__t_00_01float_00_01N_00_01Round_01_4.html" target="_self">NumericArrayConverter< half_t, float, N, Round ></a></td><td class="desc">Partial specialization for Array<half> <= Array<float> </td></tr>
|
||||
<tr id="row_0_82_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1NumericConverter.html" target="_self">NumericConverter</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_83_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1NumericConverter_3_01float_00_01half__t_00_01Round_01_4.html" target="_self">NumericConverter< float, half_t, Round ></a></td><td class="desc">Partial specialization for float <= <a class="el" href="structcutlass_1_1half__t.html" title="IEEE half-precision floating-point type. ">half_t</a> </td></tr>
|
||||
<tr id="row_0_84_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1NumericConverter_3_01half__t_00_01float_00_01FloatRoundStyle_1_1round__to__nearest_01_4.html" target="_self">NumericConverter< half_t, float, FloatRoundStyle::round_to_nearest ></a></td><td class="desc">Specialization for round-to-nearest </td></tr>
|
||||
<tr id="row_0_85_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1NumericConverter_3_01half__t_00_01float_00_01FloatRoundStyle_1_1round__toward__zero_01_4.html" target="_self">NumericConverter< half_t, float, FloatRoundStyle::round_toward_zero ></a></td><td class="desc">Specialization for round-toward-zero </td></tr>
|
||||
<tr id="row_0_86_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1NumericConverter_3_01int8__t_00_01float_00_01Round_01_4.html" target="_self">NumericConverter< int8_t, float, Round ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_87_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1NumericConverter_3_01T_00_01T_00_01Round_01_4.html" target="_self">NumericConverter< T, T, Round ></a></td><td class="desc">Partial specialization for float <= <a class="el" href="structcutlass_1_1half__t.html" title="IEEE half-precision floating-point type. ">half_t</a> </td></tr>
|
||||
<tr id="row_0_88_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1NumericConverterClamp.html" target="_self">NumericConverterClamp</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_89_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1plus.html" target="_self">plus</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_90_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1plus_3_01Array_3_01half__t_00_01N_01_4_01_4.html" target="_self">plus< Array< half_t, N > ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_91_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1plus_3_01Array_3_01T_00_01N_01_4_01_4.html" target="_self">plus< Array< T, N > ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_92_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;"> </span><span id="arr_0_92_" class="arrow" onclick="toggleFolder('0_92_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1PredicateVector.html" target="_self">PredicateVector</a></td><td class="desc">Statically sized array of bits implementing </td></tr>
|
||||
<tr id="row_0_92_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1PredicateVector_1_1ConstIterator.html" target="_self">ConstIterator</a></td><td class="desc">An iterator implementing <a class="el" href="group__predicate__iterator__concept.html">Predicate Iterator Concept</a> enabling sequential read and write access to predicates </td></tr>
|
||||
<tr id="row_0_92_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1PredicateVector_1_1Iterator.html" target="_self">Iterator</a></td><td class="desc">An iterator implementing <a class="el" href="group__predicate__iterator__concept.html">Predicate Iterator Concept</a> enabling sequential read and write access to predicates </td></tr>
|
||||
<tr id="row_0_92_2_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1PredicateVector_1_1TrivialIterator.html" target="_self">TrivialIterator</a></td><td class="desc"><a class="el" href="classcutlass_1_1PredicateVector_1_1Iterator.html" title="An iterator implementing Predicate Iterator Concept enabling sequential read and write access to pred...">Iterator</a> that always returns true </td></tr>
|
||||
<tr id="row_0_93_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1RealType.html" target="_self">RealType</a></td><td class="desc">Used to determine the real-valued underlying type of a numeric type T </td></tr>
|
||||
<tr id="row_0_94_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1RealType_3_01complex_3_01T_01_4_01_4.html" target="_self">RealType< complex< T > ></a></td><td class="desc">Partial specialization for complex-valued type </td></tr>
|
||||
<tr id="row_0_95_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1ReferenceFactory.html" target="_self">ReferenceFactory</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_96_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1ReferenceFactory_3_01Element_00_01false_01_4.html" target="_self">ReferenceFactory< Element, false ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_97_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1ReferenceFactory_3_01Element_00_01true_01_4.html" target="_self">ReferenceFactory< Element, true ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_98_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1ScalarIO.html" target="_self">ScalarIO</a></td><td class="desc">Helper to enable formatted printing of CUTLASS scalar types to an ostream </td></tr>
|
||||
<tr id="row_0_99_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1Semaphore.html" target="_self">Semaphore</a></td><td class="desc">CTA-wide semaphore for inter-CTA synchronization </td></tr>
|
||||
<tr id="row_0_100_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1sizeof__bits.html" target="_self">sizeof_bits</a></td><td class="desc">Defines the size of an element in bits </td></tr>
|
||||
<tr id="row_0_101_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1sizeof__bits_3_01Array_3_01T_00_01N_00_01RegisterSized_01_4_01_4.html" target="_self">sizeof_bits< Array< T, N, RegisterSized > ></a></td><td class="desc">Statically sized array for any data type </td></tr>
|
||||
<tr id="row_0_102_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1sizeof__bits_3_01bin1__t_01_4.html" target="_self">sizeof_bits< bin1_t ></a></td><td class="desc">Defines the size of an element in bits - specialized for bin1_t </td></tr>
|
||||
<tr id="row_0_103_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1sizeof__bits_3_01int4b__t_01_4.html" target="_self">sizeof_bits< int4b_t ></a></td><td class="desc">Defines the size of an element in bits - specialized for int4b_t </td></tr>
|
||||
<tr id="row_0_104_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1sizeof__bits_3_01uint1b__t_01_4.html" target="_self">sizeof_bits< uint1b_t ></a></td><td class="desc">Defines the size of an element in bits - specialized for uint1b_t </td></tr>
|
||||
<tr id="row_0_105_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1sizeof__bits_3_01uint4b__t_01_4.html" target="_self">sizeof_bits< uint4b_t ></a></td><td class="desc">Defines the size of an element in bits - specialized for uint4b_t </td></tr>
|
||||
<tr id="row_0_106_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1sqrt__est.html" target="_self">sqrt_est</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_107_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1SubbyteReference.html" target="_self">SubbyteReference</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_108_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1Tensor4DCoord.html" target="_self">Tensor4DCoord</a></td><td class="desc">Defines a canonical 4D coordinate used by tensor operations </td></tr>
|
||||
<tr id="row_0_109_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1TensorRef.html" target="_self">TensorRef</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_110_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="classcutlass_1_1TensorView.html" target="_self">TensorView</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_111_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1TypeTraits.html" target="_self">TypeTraits</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_112_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;"> </span><span id="arr_0_112_" class="arrow" onclick="toggleFolder('0_112_')">►</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1TypeTraits_3_01complex_3_01double_01_4_01_4.html" target="_self">TypeTraits< complex< double > ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_112_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1TypeTraits_3_01complex_3_01double_01_4_01_4_1_1integer__type.html" target="_self">integer_type</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_112_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1TypeTraits_3_01complex_3_01double_01_4_01_4_1_1unsigned__type.html" target="_self">unsigned_type</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_113_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1TypeTraits_3_01complex_3_01float_01_4_01_4.html" target="_self">TypeTraits< complex< float > ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_114_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1TypeTraits_3_01complex_3_01half_01_4_01_4.html" target="_self">TypeTraits< complex< half > ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_115_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1TypeTraits_3_01complex_3_01half__t_01_4_01_4.html" target="_self">TypeTraits< complex< half_t > ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_116_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1TypeTraits_3_01double_01_4.html" target="_self">TypeTraits< double ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_117_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1TypeTraits_3_01float_01_4.html" target="_self">TypeTraits< float ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_118_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1TypeTraits_3_01half__t_01_4.html" target="_self">TypeTraits< half_t ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_119_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1TypeTraits_3_01int_01_4.html" target="_self">TypeTraits< int ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_120_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1TypeTraits_3_01int64__t_01_4.html" target="_self">TypeTraits< int64_t ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_121_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1TypeTraits_3_01int8__t_01_4.html" target="_self">TypeTraits< int8_t ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_122_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1TypeTraits_3_01uint64__t_01_4.html" target="_self">TypeTraits< uint64_t ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_123_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1TypeTraits_3_01uint8__t_01_4.html" target="_self">TypeTraits< uint8_t ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_124_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1TypeTraits_3_01unsigned_01_4.html" target="_self">TypeTraits< unsigned ></a></td><td class="desc"></td></tr>
|
||||
<tr id="row_0_125_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structcutlass_1_1xor__add.html" target="_self">xor_add</a></td><td class="desc">Fused multiply-add </td></tr>
|
||||
<tr id="row_1_"><td class="entry"><span style="width:0px;display:inline-block;"> </span><span id="arr_1_" class="arrow" onclick="toggleFolder('1_')">►</span><span class="icona"><span class="icon">N</span></span><b>std</b></td><td class="desc">STL namespace </td></tr>
|
||||
<tr id="row_1_0_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structstd_1_1numeric__limits_3_01cutlass_1_1half__t_01_4.html" target="_self">numeric_limits< cutlass::half_t ></a></td><td class="desc">Numeric limits </td></tr>
|
||||
<tr id="row_2_" class="even"><td class="entry"><span style="width:16px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structDebugType.html" target="_self">DebugType</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_3_"><td class="entry"><span style="width:16px;display:inline-block;"> </span><span class="icona"><span class="icon">C</span></span><a class="el" href="structDebugValue.html" target="_self">DebugValue</a></td><td class="desc"></td></tr>
|
||||
</table>
|
||||
</div><!-- directory -->
|
||||
</div><!-- contents -->
|
||||
<!-- start footer part -->
|
||||
<hr class="footer"/><address class="footer"><small>
|
||||
Generated by  <a href="http://www.doxygen.org/index.html">
|
||||
<img class="footer" src="doxygen.png" alt="doxygen"/>
|
||||
</a> 1.8.11
|
||||
</small></address>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,156 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
|
||||
<meta name="generator" content="Doxygen 1.8.11"/>
|
||||
<title>CUTLASS: mma.h File Reference</title>
|
||||
<link href="tabs.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="jquery.js"></script>
|
||||
<script type="text/javascript" src="dynsections.js"></script>
|
||||
<link href="search/search.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="search/searchdata.js"></script>
|
||||
<script type="text/javascript" src="search/search.js"></script>
|
||||
<script type="text/javascript">
|
||||
$(document).ready(function() { init_search(); });
|
||||
</script>
|
||||
<script type="text/x-mathjax-config">
|
||||
MathJax.Hub.Config({
|
||||
extensions: ["tex2jax.js"],
|
||||
jax: ["input/TeX","output/HTML-CSS"],
|
||||
});
|
||||
</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
|
||||
<link href="doxygen.css" rel="stylesheet" type="text/css" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
|
||||
<div id="titlearea">
|
||||
<table cellspacing="0" cellpadding="0">
|
||||
<tbody>
|
||||
<tr style="height: 56px;">
|
||||
<td id="projectlogo"><img alt="Logo" src="cutlass-logo-small.png"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">CUTLASS
|
||||
</div>
|
||||
<div id="projectbrief">CUDA Templates for Linear Algebra Subroutines and Solvers</div>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<!-- end header part -->
|
||||
<!-- Generated by Doxygen 1.8.11 -->
|
||||
<script type="text/javascript">
|
||||
var searchBox = new SearchBox("searchBox", "search",false,'Search');
|
||||
</script>
|
||||
<div id="navrow1" class="tabs">
|
||||
<ul class="tablist">
|
||||
<li><a href="index.html"><span>Main Page</span></a></li>
|
||||
<li><a href="modules.html"><span>Modules</span></a></li>
|
||||
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
|
||||
<li><a href="annotated.html"><span>Classes</span></a></li>
|
||||
<li class="current"><a href="files.html"><span>Files</span></a></li>
|
||||
<li>
|
||||
<div id="MSearchBox" class="MSearchBoxInactive">
|
||||
<span class="left">
|
||||
<img id="MSearchSelect" src="search/mag_sel.png"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
alt=""/>
|
||||
<input type="text" id="MSearchField" value="Search" accesskey="S"
|
||||
onfocus="searchBox.OnSearchFieldFocus(true)"
|
||||
onblur="searchBox.OnSearchFieldFocus(false)"
|
||||
onkeyup="searchBox.OnSearchFieldChange(event)"/>
|
||||
</span><span class="right">
|
||||
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
|
||||
</span>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div id="navrow2" class="tabs2">
|
||||
<ul class="tablist">
|
||||
<li><a href="files.html"><span>File List</span></a></li>
|
||||
<li><a href="globals.html"><span>File Members</span></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<!-- window showing the filter options -->
|
||||
<div id="MSearchSelectWindow"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
onkeydown="return searchBox.OnSearchSelectKey(event)">
|
||||
</div>
|
||||
|
||||
<!-- iframe showing the search results (closed by default) -->
|
||||
<div id="MSearchResultsWindow">
|
||||
<iframe src="javascript:void(0)" frameborder="0"
|
||||
name="MSearchResults" id="MSearchResults">
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
<div id="nav-path" class="navpath">
|
||||
<ul>
|
||||
<li class="navelem"><a class="el" href="dir_d44c64559bbebec7f509842c48db8b23.html">include</a></li><li class="navelem"><a class="el" href="dir_6baf2bb612a2f0daa69af3101ede80a1.html">cutlass</a></li><li class="navelem"><a class="el" href="dir_048c1df36ab9c2efbb0733edba6291c9.html">arch</a></li> </ul>
|
||||
</div>
|
||||
</div><!-- top -->
|
||||
<div class="header">
|
||||
<div class="summary">
|
||||
<a href="#nested-classes">Classes</a> |
|
||||
<a href="#namespaces">Namespaces</a> </div>
|
||||
<div class="headertitle">
|
||||
<div class="title">arch/mma.h File Reference</div> </div>
|
||||
</div><!--header-->
|
||||
<div class="contents">
|
||||
|
||||
<p>Templates exposing architecture support for multiply-add operations.
|
||||
<a href="#details">More...</a></p>
|
||||
<div class="textblock"><code>#include "<a class="el" href="array_8h_source.html">cutlass/array.h</a>"</code><br />
|
||||
<code>#include "<a class="el" href="numeric__types_8h_source.html">cutlass/numeric_types.h</a>"</code><br />
|
||||
<code>#include "<a class="el" href="include_2cutlass_2gemm_2gemm_8h_source.html">cutlass/gemm/gemm.h</a>"</code><br />
|
||||
<code>#include "<a class="el" href="arch_2mma__sm50_8h_source.html">cutlass/arch/mma_sm50.h</a>"</code><br />
|
||||
<code>#include "<a class="el" href="arch_2mma__sm60_8h_source.html">cutlass/arch/mma_sm60.h</a>"</code><br />
|
||||
<code>#include "<a class="el" href="arch_2mma__sm61_8h_source.html">cutlass/arch/mma_sm61.h</a>"</code><br />
|
||||
<code>#include "<a class="el" href="mma__sm70_8h_source.html">cutlass/arch/mma_sm70.h</a>"</code><br />
|
||||
<code>#include "<a class="el" href="mma__sm75_8h_source.html">cutlass/arch/mma_sm75.h</a>"</code><br />
|
||||
</div><div class="textblock"><div class="dynheader">
|
||||
Include dependency graph for arch/mma.h:</div>
|
||||
<div class="dyncontent">
|
||||
<div class="center"><img src="arch_2mma_8h__incl.png" border="0" usemap="#mma_8h" alt=""/></div>
|
||||
<map name="mma_8h" id="mma_8h">
|
||||
</map>
|
||||
</div>
|
||||
</div><div class="textblock"><div class="dynheader">
|
||||
This graph shows which files directly or indirectly include this file:</div>
|
||||
<div class="dyncontent">
|
||||
<div class="center"><img src="arch_2mma_8h__dep__incl.png" border="0" usemap="#mma_8hdep" alt=""/></div>
|
||||
<map name="mma_8hdep" id="mma_8hdep">
|
||||
</map>
|
||||
</div>
|
||||
</div>
|
||||
<p><a href="arch_2mma_8h_source.html">Go to the source code of this file.</a></p>
|
||||
<table class="memberdecls">
|
||||
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="nested-classes"></a>
|
||||
Classes</h2></td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct  </td><td class="memItemRight" valign="bottom"><a class="el" href="structcutlass_1_1arch_1_1Mma.html">cutlass::arch::Mma< Shape_, kThreads_, ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, Operator ></a></td></tr>
|
||||
<tr class="memdesc:"><td class="mdescLeft"> </td><td class="mdescRight">Matrix multiply-add operation. <a href="structcutlass_1_1arch_1_1Mma.html#details">More...</a><br /></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct  </td><td class="memItemRight" valign="bottom"><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_011_01_4_00_011_00_01ElementAb6e65b2cf5ede7f41cb070a767158dee.html">cutlass::arch::Mma< gemm::GemmShape< 1, 1, 1 >, 1, ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, Operator ></a></td></tr>
|
||||
<tr class="memdesc:"><td class="mdescLeft"> </td><td class="mdescRight">Matrix multiply-add operation - specialized for 1x1x1x1 matrix multiply operation. <a href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_011_01_4_00_011_00_01ElementAb6e65b2cf5ede7f41cb070a767158dee.html#details">More...</a><br /></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
</table><table class="memberdecls">
|
||||
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="namespaces"></a>
|
||||
Namespaces</h2></td></tr>
|
||||
<tr class="memitem:namespacecutlass"><td class="memItemLeft" align="right" valign="top">  </td><td class="memItemRight" valign="bottom"><a class="el" href="namespacecutlass.html">cutlass</a></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:namespacecutlass_1_1arch"><td class="memItemLeft" align="right" valign="top">  </td><td class="memItemRight" valign="bottom"><a class="el" href="namespacecutlass_1_1arch.html">cutlass::arch</a></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
</table>
|
||||
</div><!-- contents -->
|
||||
<!-- start footer part -->
|
||||
<hr class="footer"/><address class="footer"><small>
|
||||
Generated by  <a href="http://www.doxygen.org/index.html">
|
||||
<img class="footer" src="doxygen.png" alt="doxygen"/>
|
||||
</a> 1.8.11
|
||||
</small></address>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1 @@
|
|||
7d16b59e6ba0442b8a275a213d5da3a6
|
||||
|
|
@ -0,0 +1 @@
|
|||
d1fff3f9d55a262110aa6a456caa91e0
|
||||
File diff suppressed because one or more lines are too long
|
|
@ -0,0 +1,176 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
|
||||
<meta name="generator" content="Doxygen 1.8.11"/>
|
||||
<title>CUTLASS: mma_sm50.h File Reference</title>
|
||||
<link href="tabs.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="jquery.js"></script>
|
||||
<script type="text/javascript" src="dynsections.js"></script>
|
||||
<link href="search/search.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="search/searchdata.js"></script>
|
||||
<script type="text/javascript" src="search/search.js"></script>
|
||||
<script type="text/javascript">
|
||||
$(document).ready(function() { init_search(); });
|
||||
</script>
|
||||
<script type="text/x-mathjax-config">
|
||||
MathJax.Hub.Config({
|
||||
extensions: ["tex2jax.js"],
|
||||
jax: ["input/TeX","output/HTML-CSS"],
|
||||
});
|
||||
</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
|
||||
<link href="doxygen.css" rel="stylesheet" type="text/css" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
|
||||
<div id="titlearea">
|
||||
<table cellspacing="0" cellpadding="0">
|
||||
<tbody>
|
||||
<tr style="height: 56px;">
|
||||
<td id="projectlogo"><img alt="Logo" src="cutlass-logo-small.png"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">CUTLASS
|
||||
</div>
|
||||
<div id="projectbrief">CUDA Templates for Linear Algebra Subroutines and Solvers</div>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<!-- end header part -->
|
||||
<!-- Generated by Doxygen 1.8.11 -->
|
||||
<script type="text/javascript">
|
||||
var searchBox = new SearchBox("searchBox", "search",false,'Search');
|
||||
</script>
|
||||
<div id="navrow1" class="tabs">
|
||||
<ul class="tablist">
|
||||
<li><a href="index.html"><span>Main Page</span></a></li>
|
||||
<li><a href="modules.html"><span>Modules</span></a></li>
|
||||
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
|
||||
<li><a href="annotated.html"><span>Classes</span></a></li>
|
||||
<li class="current"><a href="files.html"><span>Files</span></a></li>
|
||||
<li>
|
||||
<div id="MSearchBox" class="MSearchBoxInactive">
|
||||
<span class="left">
|
||||
<img id="MSearchSelect" src="search/mag_sel.png"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
alt=""/>
|
||||
<input type="text" id="MSearchField" value="Search" accesskey="S"
|
||||
onfocus="searchBox.OnSearchFieldFocus(true)"
|
||||
onblur="searchBox.OnSearchFieldFocus(false)"
|
||||
onkeyup="searchBox.OnSearchFieldChange(event)"/>
|
||||
</span><span class="right">
|
||||
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
|
||||
</span>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div id="navrow2" class="tabs2">
|
||||
<ul class="tablist">
|
||||
<li><a href="files.html"><span>File List</span></a></li>
|
||||
<li><a href="globals.html"><span>File Members</span></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<!-- window showing the filter options -->
|
||||
<div id="MSearchSelectWindow"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
onkeydown="return searchBox.OnSearchSelectKey(event)">
|
||||
</div>
|
||||
|
||||
<!-- iframe showing the search results (closed by default) -->
|
||||
<div id="MSearchResultsWindow">
|
||||
<iframe src="javascript:void(0)" frameborder="0"
|
||||
name="MSearchResults" id="MSearchResults">
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
<div id="nav-path" class="navpath">
|
||||
<ul>
|
||||
<li class="navelem"><a class="el" href="dir_d44c64559bbebec7f509842c48db8b23.html">include</a></li><li class="navelem"><a class="el" href="dir_6baf2bb612a2f0daa69af3101ede80a1.html">cutlass</a></li><li class="navelem"><a class="el" href="dir_048c1df36ab9c2efbb0733edba6291c9.html">arch</a></li> </ul>
|
||||
</div>
|
||||
</div><!-- top -->
|
||||
<div class="header">
|
||||
<div class="summary">
|
||||
<a href="#nested-classes">Classes</a> |
|
||||
<a href="#namespaces">Namespaces</a> </div>
|
||||
<div class="headertitle">
|
||||
<div class="title">arch/mma_sm50.h File Reference</div> </div>
|
||||
</div><!--header-->
|
||||
<div class="contents">
|
||||
|
||||
<p>Matrix multiply.
|
||||
<a href="#details">More...</a></p>
|
||||
<div class="textblock"><code>#include "<a class="el" href="arch_2mma_8h_source.html">cutlass/arch/mma.h</a>"</code><br />
|
||||
<code>#include "<a class="el" href="complex_8h_source.html">cutlass/complex.h</a>"</code><br />
|
||||
<code>#include "<a class="el" href="layout_2matrix_8h_source.html">cutlass/layout/matrix.h</a>"</code><br />
|
||||
<code>#include "<a class="el" href="include_2cutlass_2gemm_2gemm_8h_source.html">cutlass/gemm/gemm.h</a>"</code><br />
|
||||
</div><div class="textblock"><div class="dynheader">
|
||||
Include dependency graph for arch/mma_sm50.h:</div>
|
||||
<div class="dyncontent">
|
||||
<div class="center"><img src="arch_2mma__sm50_8h__incl.png" border="0" usemap="#mma__sm50_8h" alt=""/></div>
|
||||
<map name="mma__sm50_8h" id="mma__sm50_8h">
|
||||
</map>
|
||||
</div>
|
||||
</div><div class="textblock"><div class="dynheader">
|
||||
This graph shows which files directly or indirectly include this file:</div>
|
||||
<div class="dyncontent">
|
||||
<div class="center"><img src="arch_2mma__sm50_8h__dep__incl.png" border="0" usemap="#mma__sm50_8hdep" alt=""/></div>
|
||||
<map name="mma__sm50_8hdep" id="mma__sm50_8hdep">
|
||||
</map>
|
||||
</div>
|
||||
</div>
|
||||
<p><a href="arch_2mma__sm50_8h_source.html">Go to the source code of this file.</a></p>
|
||||
<table class="memberdecls">
|
||||
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="nested-classes"></a>
|
||||
Classes</h2></td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct  </td><td class="memItemRight" valign="bottom"><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_011_01_4_00_011_00_01float_004bb3fd76ca2af7b3210676fa9644d95b.html">cutlass::arch::Mma< gemm::GemmShape< 1, 1, 1 >, 1, float, LayoutA, float, LayoutB, float, LayoutC, OpMultiplyAdd ></a></td></tr>
|
||||
<tr class="memdesc:"><td class="mdescLeft"> </td><td class="mdescRight">Matrix multiply-add operation. <a href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_011_01_4_00_011_00_01float_004bb3fd76ca2af7b3210676fa9644d95b.html#details">More...</a><br /></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct  </td><td class="memItemRight" valign="bottom"><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_011_01_4_00_011_00_01double_0aa57e6a2e6b5da37d10688bf99419a23.html">cutlass::arch::Mma< gemm::GemmShape< 1, 1, 1 >, 1, double, LayoutA, double, LayoutB, double, LayoutC, OpMultiplyAdd ></a></td></tr>
|
||||
<tr class="memdesc:"><td class="mdescLeft"> </td><td class="mdescRight">Matrix multiply-add operation. <a href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_011_01_4_00_011_00_01double_0aa57e6a2e6b5da37d10688bf99419a23.html#details">More...</a><br /></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct  </td><td class="memItemRight" valign="bottom"><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_011_01_4_00_011_00_01int_00_00b2dff9ce8caad9aff5bc6a355539161.html">cutlass::arch::Mma< gemm::GemmShape< 1, 1, 1 >, 1, int, LayoutA, int, LayoutB, int, LayoutC, OpMultiplyAdd ></a></td></tr>
|
||||
<tr class="memdesc:"><td class="mdescLeft"> </td><td class="mdescRight">Matrix multiply-add operation. <a href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_011_01_4_00_011_00_01int_00_00b2dff9ce8caad9aff5bc6a355539161.html#details">More...</a><br /></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct  </td><td class="memItemRight" valign="bottom"><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_011_01_4_00_011_00_01complex_76f9d24016e1b4167b16f4d7628c9546.html">cutlass::arch::Mma< gemm::GemmShape< 1, 1, 1 >, 1, complex< float >, LayoutA, complex< float >, LayoutB, complex< float >, LayoutC, OpMultiplyAdd ></a></td></tr>
|
||||
<tr class="memdesc:"><td class="mdescLeft"> </td><td class="mdescRight">Matrix multiply-add operation. <a href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_011_01_4_00_011_00_01complex_76f9d24016e1b4167b16f4d7628c9546.html#details">More...</a><br /></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct  </td><td class="memItemRight" valign="bottom"><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_011_01_4_00_011_00_01complex_f1c9d2ee842455cd0c5b71d56108d468.html">cutlass::arch::Mma< gemm::GemmShape< 1, 1, 1 >, 1, complex< float >, LayoutA, float, LayoutB, complex< float >, LayoutC, OpMultiplyAdd ></a></td></tr>
|
||||
<tr class="memdesc:"><td class="mdescLeft"> </td><td class="mdescRight">Matrix multiply-add operation. <a href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_011_01_4_00_011_00_01complex_f1c9d2ee842455cd0c5b71d56108d468.html#details">More...</a><br /></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct  </td><td class="memItemRight" valign="bottom"><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_011_01_4_00_011_00_01float_00e3e12e263df6506b8cf06c3f4d478b8e.html">cutlass::arch::Mma< gemm::GemmShape< 1, 1, 1 >, 1, float, LayoutA, complex< float >, LayoutB, complex< float >, LayoutC, OpMultiplyAdd ></a></td></tr>
|
||||
<tr class="memdesc:"><td class="mdescLeft"> </td><td class="mdescRight">Matrix multiply-add operation. <a href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_011_01_4_00_011_00_01float_00e3e12e263df6506b8cf06c3f4d478b8e.html#details">More...</a><br /></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct  </td><td class="memItemRight" valign="bottom"><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_011_01_4_00_011_00_01complex_30fa42e1ad201df010637cd22fc070a1.html">cutlass::arch::Mma< gemm::GemmShape< 1, 1, 1 >, 1, complex< double >, LayoutA, complex< double >, LayoutB, complex< double >, LayoutC, OpMultiplyAdd ></a></td></tr>
|
||||
<tr class="memdesc:"><td class="mdescLeft"> </td><td class="mdescRight">Matrix multiply-add operation. <a href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_011_01_4_00_011_00_01complex_30fa42e1ad201df010637cd22fc070a1.html#details">More...</a><br /></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct  </td><td class="memItemRight" valign="bottom"><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_011_01_4_00_011_00_01complex_48b3a43bc03fff93a111ac01abe7e40d.html">cutlass::arch::Mma< gemm::GemmShape< 1, 1, 1 >, 1, complex< double >, LayoutA, double, LayoutB, complex< double >, LayoutC, OpMultiplyAdd ></a></td></tr>
|
||||
<tr class="memdesc:"><td class="mdescLeft"> </td><td class="mdescRight">Matrix multiply-add operation. <a href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_011_01_4_00_011_00_01complex_48b3a43bc03fff93a111ac01abe7e40d.html#details">More...</a><br /></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct  </td><td class="memItemRight" valign="bottom"><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_011_01_4_00_011_00_01double_070b94670e040ed5855e5b42d5ca8a443.html">cutlass::arch::Mma< gemm::GemmShape< 1, 1, 1 >, 1, double, LayoutA, complex< double >, LayoutB, complex< double >, LayoutC, OpMultiplyAdd ></a></td></tr>
|
||||
<tr class="memdesc:"><td class="mdescLeft"> </td><td class="mdescRight">Matrix multiply-add operation. <a href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_011_01_4_00_011_00_01double_070b94670e040ed5855e5b42d5ca8a443.html#details">More...</a><br /></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct  </td><td class="memItemRight" valign="bottom"><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_011_01_4_00_011_00_01half__t_4f30ee91f7bb3844ff7579c68d078818.html">cutlass::arch::Mma< gemm::GemmShape< 1, 1, 1 >, 1, half_t, LayoutA, half_t, LayoutB, float, LayoutC, OpMultiplyAdd ></a></td></tr>
|
||||
<tr class="memdesc:"><td class="mdescLeft"> </td><td class="mdescRight">Matrix multiply-add operation. <a href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_011_01_4_00_011_00_01half__t_4f30ee91f7bb3844ff7579c68d078818.html#details">More...</a><br /></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
</table><table class="memberdecls">
|
||||
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="namespaces"></a>
|
||||
Namespaces</h2></td></tr>
|
||||
<tr class="memitem:namespacecutlass"><td class="memItemLeft" align="right" valign="top">  </td><td class="memItemRight" valign="bottom"><a class="el" href="namespacecutlass.html">cutlass</a></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:namespacecutlass_1_1arch"><td class="memItemLeft" align="right" valign="top">  </td><td class="memItemRight" valign="bottom"><a class="el" href="namespacecutlass_1_1arch.html">cutlass::arch</a></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
</table>
|
||||
</div><!-- contents -->
|
||||
<!-- start footer part -->
|
||||
<hr class="footer"/><address class="footer"><small>
|
||||
Generated by  <a href="http://www.doxygen.org/index.html">
|
||||
<img class="footer" src="doxygen.png" alt="doxygen"/>
|
||||
</a> 1.8.11
|
||||
</small></address>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1 @@
|
|||
988e6466c703c4e63c9a889b8c3c54b5
|
||||
|
|
@ -0,0 +1 @@
|
|||
03f1613fdffbd6e7575de0d2967d08bf
|
||||
File diff suppressed because one or more lines are too long
|
|
@ -0,0 +1,157 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
|
||||
<meta name="generator" content="Doxygen 1.8.11"/>
|
||||
<title>CUTLASS: mma_sm60.h File Reference</title>
|
||||
<link href="tabs.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="jquery.js"></script>
|
||||
<script type="text/javascript" src="dynsections.js"></script>
|
||||
<link href="search/search.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="search/searchdata.js"></script>
|
||||
<script type="text/javascript" src="search/search.js"></script>
|
||||
<script type="text/javascript">
|
||||
$(document).ready(function() { init_search(); });
|
||||
</script>
|
||||
<script type="text/x-mathjax-config">
|
||||
MathJax.Hub.Config({
|
||||
extensions: ["tex2jax.js"],
|
||||
jax: ["input/TeX","output/HTML-CSS"],
|
||||
});
|
||||
</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
|
||||
<link href="doxygen.css" rel="stylesheet" type="text/css" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
|
||||
<div id="titlearea">
|
||||
<table cellspacing="0" cellpadding="0">
|
||||
<tbody>
|
||||
<tr style="height: 56px;">
|
||||
<td id="projectlogo"><img alt="Logo" src="cutlass-logo-small.png"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">CUTLASS
|
||||
</div>
|
||||
<div id="projectbrief">CUDA Templates for Linear Algebra Subroutines and Solvers</div>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<!-- end header part -->
|
||||
<!-- Generated by Doxygen 1.8.11 -->
|
||||
<script type="text/javascript">
|
||||
var searchBox = new SearchBox("searchBox", "search",false,'Search');
|
||||
</script>
|
||||
<div id="navrow1" class="tabs">
|
||||
<ul class="tablist">
|
||||
<li><a href="index.html"><span>Main Page</span></a></li>
|
||||
<li><a href="modules.html"><span>Modules</span></a></li>
|
||||
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
|
||||
<li><a href="annotated.html"><span>Classes</span></a></li>
|
||||
<li class="current"><a href="files.html"><span>Files</span></a></li>
|
||||
<li>
|
||||
<div id="MSearchBox" class="MSearchBoxInactive">
|
||||
<span class="left">
|
||||
<img id="MSearchSelect" src="search/mag_sel.png"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
alt=""/>
|
||||
<input type="text" id="MSearchField" value="Search" accesskey="S"
|
||||
onfocus="searchBox.OnSearchFieldFocus(true)"
|
||||
onblur="searchBox.OnSearchFieldFocus(false)"
|
||||
onkeyup="searchBox.OnSearchFieldChange(event)"/>
|
||||
</span><span class="right">
|
||||
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
|
||||
</span>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div id="navrow2" class="tabs2">
|
||||
<ul class="tablist">
|
||||
<li><a href="files.html"><span>File List</span></a></li>
|
||||
<li><a href="globals.html"><span>File Members</span></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<!-- window showing the filter options -->
|
||||
<div id="MSearchSelectWindow"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
onkeydown="return searchBox.OnSearchSelectKey(event)">
|
||||
</div>
|
||||
|
||||
<!-- iframe showing the search results (closed by default) -->
|
||||
<div id="MSearchResultsWindow">
|
||||
<iframe src="javascript:void(0)" frameborder="0"
|
||||
name="MSearchResults" id="MSearchResults">
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
<div id="nav-path" class="navpath">
|
||||
<ul>
|
||||
<li class="navelem"><a class="el" href="dir_d44c64559bbebec7f509842c48db8b23.html">include</a></li><li class="navelem"><a class="el" href="dir_6baf2bb612a2f0daa69af3101ede80a1.html">cutlass</a></li><li class="navelem"><a class="el" href="dir_048c1df36ab9c2efbb0733edba6291c9.html">arch</a></li> </ul>
|
||||
</div>
|
||||
</div><!-- top -->
|
||||
<div class="header">
|
||||
<div class="summary">
|
||||
<a href="#nested-classes">Classes</a> |
|
||||
<a href="#namespaces">Namespaces</a> </div>
|
||||
<div class="headertitle">
|
||||
<div class="title">arch/mma_sm60.h File Reference</div> </div>
|
||||
</div><!--header-->
|
||||
<div class="contents">
|
||||
|
||||
<p>Matrix multiply.
|
||||
<a href="#details">More...</a></p>
|
||||
<div class="textblock"><code>#include <cuda_fp16.h></code><br />
|
||||
<code>#include "<a class="el" href="arch_2mma_8h_source.html">cutlass/arch/mma.h</a>"</code><br />
|
||||
<code>#include "<a class="el" href="layout_2matrix_8h_source.html">cutlass/layout/matrix.h</a>"</code><br />
|
||||
</div><div class="textblock"><div class="dynheader">
|
||||
Include dependency graph for arch/mma_sm60.h:</div>
|
||||
<div class="dyncontent">
|
||||
<div class="center"><img src="arch_2mma__sm60_8h__incl.png" border="0" usemap="#mma__sm60_8h" alt=""/></div>
|
||||
<map name="mma__sm60_8h" id="mma__sm60_8h">
|
||||
</map>
|
||||
</div>
|
||||
</div><div class="textblock"><div class="dynheader">
|
||||
This graph shows which files directly or indirectly include this file:</div>
|
||||
<div class="dyncontent">
|
||||
<div class="center"><img src="arch_2mma__sm60_8h__dep__incl.png" border="0" usemap="#mma__sm60_8hdep" alt=""/></div>
|
||||
<map name="mma__sm60_8hdep" id="mma__sm60_8hdep">
|
||||
</map>
|
||||
</div>
|
||||
</div>
|
||||
<p><a href="arch_2mma__sm60_8h_source.html">Go to the source code of this file.</a></p>
|
||||
<table class="memberdecls">
|
||||
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="nested-classes"></a>
|
||||
Classes</h2></td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct  </td><td class="memItemRight" valign="bottom"><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_012_00_011_00_011_01_4_00_011_00_01half__t_8cf78649807b93684f3d431bfa34ee28.html">cutlass::arch::Mma< gemm::GemmShape< 2, 1, 1 >, 1, half_t, LayoutA, half_t, LayoutB, half_t, LayoutC, OpMultiplyAdd ></a></td></tr>
|
||||
<tr class="memdesc:"><td class="mdescLeft"> </td><td class="mdescRight">Matrix multiply-add operation. <a href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_012_00_011_00_011_01_4_00_011_00_01half__t_8cf78649807b93684f3d431bfa34ee28.html#details">More...</a><br /></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct  </td><td class="memItemRight" valign="bottom"><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_012_00_011_01_4_00_011_00_01half__t_f3dc2e59f857ada163d1e0781ea8f391.html">cutlass::arch::Mma< gemm::GemmShape< 1, 2, 1 >, 1, half_t, LayoutA, half_t, LayoutB, half_t, layout::RowMajor, OpMultiplyAdd ></a></td></tr>
|
||||
<tr class="memdesc:"><td class="mdescLeft"> </td><td class="mdescRight">Matrix multiply-add operation. <a href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_012_00_011_01_4_00_011_00_01half__t_f3dc2e59f857ada163d1e0781ea8f391.html#details">More...</a><br /></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct  </td><td class="memItemRight" valign="bottom"><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_012_00_012_00_011_01_4_00_011_00_01half__t_ccde11d1bbbdab3702772ce44eb9729a.html">cutlass::arch::Mma< gemm::GemmShape< 2, 2, 1 >, 1, half_t, layout::ColumnMajor, half_t, layout::RowMajor, half_t, layout::ColumnMajor, OpMultiplyAdd ></a></td></tr>
|
||||
<tr class="memdesc:"><td class="mdescLeft"> </td><td class="mdescRight">Matrix multiply-add operation. <a href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_012_00_012_00_011_01_4_00_011_00_01half__t_ccde11d1bbbdab3702772ce44eb9729a.html#details">More...</a><br /></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct  </td><td class="memItemRight" valign="bottom"><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_012_00_012_00_011_01_4_00_011_00_01half__t_c07cc6439298fa5486a719e577be2538.html">cutlass::arch::Mma< gemm::GemmShape< 2, 2, 1 >, 1, half_t, layout::ColumnMajor, half_t, layout::RowMajor, half_t, layout::RowMajor, OpMultiplyAdd ></a></td></tr>
|
||||
<tr class="memdesc:"><td class="mdescLeft"> </td><td class="mdescRight">Matrix multiply-add operation. <a href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_012_00_012_00_011_01_4_00_011_00_01half__t_c07cc6439298fa5486a719e577be2538.html#details">More...</a><br /></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
</table><table class="memberdecls">
|
||||
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="namespaces"></a>
|
||||
Namespaces</h2></td></tr>
|
||||
<tr class="memitem:namespacecutlass"><td class="memItemLeft" align="right" valign="top">  </td><td class="memItemRight" valign="bottom"><a class="el" href="namespacecutlass.html">cutlass</a></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:namespacecutlass_1_1arch"><td class="memItemLeft" align="right" valign="top">  </td><td class="memItemRight" valign="bottom"><a class="el" href="namespacecutlass_1_1arch.html">cutlass::arch</a></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
</table>
|
||||
</div><!-- contents -->
|
||||
<!-- start footer part -->
|
||||
<hr class="footer"/><address class="footer"><small>
|
||||
Generated by  <a href="http://www.doxygen.org/index.html">
|
||||
<img class="footer" src="doxygen.png" alt="doxygen"/>
|
||||
</a> 1.8.11
|
||||
</small></address>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1 @@
|
|||
ba69b14e3936946092854211499ae9fa
|
||||
|
|
@ -0,0 +1 @@
|
|||
e820099c55f2397639bb210d76ec4c05
|
||||
File diff suppressed because one or more lines are too long
|
|
@ -0,0 +1,149 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
|
||||
<meta name="generator" content="Doxygen 1.8.11"/>
|
||||
<title>CUTLASS: mma_sm61.h File Reference</title>
|
||||
<link href="tabs.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="jquery.js"></script>
|
||||
<script type="text/javascript" src="dynsections.js"></script>
|
||||
<link href="search/search.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="search/searchdata.js"></script>
|
||||
<script type="text/javascript" src="search/search.js"></script>
|
||||
<script type="text/javascript">
|
||||
$(document).ready(function() { init_search(); });
|
||||
</script>
|
||||
<script type="text/x-mathjax-config">
|
||||
MathJax.Hub.Config({
|
||||
extensions: ["tex2jax.js"],
|
||||
jax: ["input/TeX","output/HTML-CSS"],
|
||||
});
|
||||
</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
|
||||
<link href="doxygen.css" rel="stylesheet" type="text/css" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
|
||||
<div id="titlearea">
|
||||
<table cellspacing="0" cellpadding="0">
|
||||
<tbody>
|
||||
<tr style="height: 56px;">
|
||||
<td id="projectlogo"><img alt="Logo" src="cutlass-logo-small.png"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">CUTLASS
|
||||
</div>
|
||||
<div id="projectbrief">CUDA Templates for Linear Algebra Subroutines and Solvers</div>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<!-- end header part -->
|
||||
<!-- Generated by Doxygen 1.8.11 -->
|
||||
<script type="text/javascript">
|
||||
var searchBox = new SearchBox("searchBox", "search",false,'Search');
|
||||
</script>
|
||||
<div id="navrow1" class="tabs">
|
||||
<ul class="tablist">
|
||||
<li><a href="index.html"><span>Main Page</span></a></li>
|
||||
<li><a href="modules.html"><span>Modules</span></a></li>
|
||||
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
|
||||
<li><a href="annotated.html"><span>Classes</span></a></li>
|
||||
<li class="current"><a href="files.html"><span>Files</span></a></li>
|
||||
<li>
|
||||
<div id="MSearchBox" class="MSearchBoxInactive">
|
||||
<span class="left">
|
||||
<img id="MSearchSelect" src="search/mag_sel.png"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
alt=""/>
|
||||
<input type="text" id="MSearchField" value="Search" accesskey="S"
|
||||
onfocus="searchBox.OnSearchFieldFocus(true)"
|
||||
onblur="searchBox.OnSearchFieldFocus(false)"
|
||||
onkeyup="searchBox.OnSearchFieldChange(event)"/>
|
||||
</span><span class="right">
|
||||
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
|
||||
</span>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div id="navrow2" class="tabs2">
|
||||
<ul class="tablist">
|
||||
<li><a href="files.html"><span>File List</span></a></li>
|
||||
<li><a href="globals.html"><span>File Members</span></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<!-- window showing the filter options -->
|
||||
<div id="MSearchSelectWindow"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
onkeydown="return searchBox.OnSearchSelectKey(event)">
|
||||
</div>
|
||||
|
||||
<!-- iframe showing the search results (closed by default) -->
|
||||
<div id="MSearchResultsWindow">
|
||||
<iframe src="javascript:void(0)" frameborder="0"
|
||||
name="MSearchResults" id="MSearchResults">
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
<div id="nav-path" class="navpath">
|
||||
<ul>
|
||||
<li class="navelem"><a class="el" href="dir_d44c64559bbebec7f509842c48db8b23.html">include</a></li><li class="navelem"><a class="el" href="dir_6baf2bb612a2f0daa69af3101ede80a1.html">cutlass</a></li><li class="navelem"><a class="el" href="dir_048c1df36ab9c2efbb0733edba6291c9.html">arch</a></li> </ul>
|
||||
</div>
|
||||
</div><!-- top -->
|
||||
<div class="header">
|
||||
<div class="summary">
|
||||
<a href="#nested-classes">Classes</a> |
|
||||
<a href="#namespaces">Namespaces</a> </div>
|
||||
<div class="headertitle">
|
||||
<div class="title">arch/mma_sm61.h File Reference</div> </div>
|
||||
</div><!--header-->
|
||||
<div class="contents">
|
||||
|
||||
<p>Matrix multiply.
|
||||
<a href="#details">More...</a></p>
|
||||
<div class="textblock"><code>#include "<a class="el" href="layout_2matrix_8h_source.html">cutlass/layout/matrix.h</a>"</code><br />
|
||||
</div><div class="textblock"><div class="dynheader">
|
||||
Include dependency graph for arch/mma_sm61.h:</div>
|
||||
<div class="dyncontent">
|
||||
<div class="center"><img src="arch_2mma__sm61_8h__incl.png" border="0" usemap="#mma__sm61_8h" alt=""/></div>
|
||||
<map name="mma__sm61_8h" id="mma__sm61_8h">
|
||||
</map>
|
||||
</div>
|
||||
</div><div class="textblock"><div class="dynheader">
|
||||
This graph shows which files directly or indirectly include this file:</div>
|
||||
<div class="dyncontent">
|
||||
<div class="center"><img src="arch_2mma__sm61_8h__dep__incl.png" border="0" usemap="#mma__sm61_8hdep" alt=""/></div>
|
||||
<map name="mma__sm61_8hdep" id="mma__sm61_8hdep">
|
||||
</map>
|
||||
</div>
|
||||
</div>
|
||||
<p><a href="arch_2mma__sm61_8h_source.html">Go to the source code of this file.</a></p>
|
||||
<table class="memberdecls">
|
||||
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="nested-classes"></a>
|
||||
Classes</h2></td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct  </td><td class="memItemRight" valign="bottom"><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_014_01_4_00_011_00_01int8__t_a1ef6624fc8c10126f17f4ee88283d72.html">cutlass::arch::Mma< gemm::GemmShape< 1, 1, 4 >, 1, int8_t, LayoutA, int8_t, LayoutB, int, LayoutC, OpMultiplyAdd ></a></td></tr>
|
||||
<tr class="memdesc:"><td class="mdescLeft"> </td><td class="mdescRight">Matrix multiply-add operation. <a href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_014_01_4_00_011_00_01int8__t_a1ef6624fc8c10126f17f4ee88283d72.html#details">More...</a><br /></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct  </td><td class="memItemRight" valign="bottom"><a class="el" href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_012_01_4_00_011_00_01int16__t8c4bac365710598317a69c489f7239db.html">cutlass::arch::Mma< gemm::GemmShape< 1, 1, 2 >, 1, int16_t, layout::RowMajor, int16_t, layout::ColumnMajor, int, LayoutC, OpMultiplyAdd ></a></td></tr>
|
||||
<tr class="memdesc:"><td class="mdescLeft"> </td><td class="mdescRight">Matrix multiply-add operation. <a href="structcutlass_1_1arch_1_1Mma_3_01gemm_1_1GemmShape_3_011_00_011_00_012_01_4_00_011_00_01int16__t8c4bac365710598317a69c489f7239db.html#details">More...</a><br /></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
</table><table class="memberdecls">
|
||||
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="namespaces"></a>
|
||||
Namespaces</h2></td></tr>
|
||||
<tr class="memitem:namespacecutlass"><td class="memItemLeft" align="right" valign="top">  </td><td class="memItemRight" valign="bottom"><a class="el" href="namespacecutlass.html">cutlass</a></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:namespacecutlass_1_1arch"><td class="memItemLeft" align="right" valign="top">  </td><td class="memItemRight" valign="bottom"><a class="el" href="namespacecutlass_1_1arch.html">cutlass::arch</a></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
</table>
|
||||
</div><!-- contents -->
|
||||
<!-- start footer part -->
|
||||
<hr class="footer"/><address class="footer"><small>
|
||||
Generated by  <a href="http://www.doxygen.org/index.html">
|
||||
<img class="footer" src="doxygen.png" alt="doxygen"/>
|
||||
</a> 1.8.11
|
||||
</small></address>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1 @@
|
|||
1faaf1631d5f0e44d6cc6c7121e6972e
|
||||
|
|
@ -0,0 +1 @@
|
|||
8cce8aef2d98c4082d68734b538253c7
|
||||
File diff suppressed because one or more lines are too long
|
|
@ -0,0 +1,147 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
|
||||
<meta name="generator" content="Doxygen 1.8.11"/>
|
||||
<title>CUTLASS: arch.h File Reference</title>
|
||||
<link href="tabs.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="jquery.js"></script>
|
||||
<script type="text/javascript" src="dynsections.js"></script>
|
||||
<link href="search/search.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="search/searchdata.js"></script>
|
||||
<script type="text/javascript" src="search/search.js"></script>
|
||||
<script type="text/javascript">
|
||||
$(document).ready(function() { init_search(); });
|
||||
</script>
|
||||
<script type="text/x-mathjax-config">
|
||||
MathJax.Hub.Config({
|
||||
extensions: ["tex2jax.js"],
|
||||
jax: ["input/TeX","output/HTML-CSS"],
|
||||
});
|
||||
</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
|
||||
<link href="doxygen.css" rel="stylesheet" type="text/css" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
|
||||
<div id="titlearea">
|
||||
<table cellspacing="0" cellpadding="0">
|
||||
<tbody>
|
||||
<tr style="height: 56px;">
|
||||
<td id="projectlogo"><img alt="Logo" src="cutlass-logo-small.png"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">CUTLASS
|
||||
</div>
|
||||
<div id="projectbrief">CUDA Templates for Linear Algebra Subroutines and Solvers</div>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<!-- end header part -->
|
||||
<!-- Generated by Doxygen 1.8.11 -->
|
||||
<script type="text/javascript">
|
||||
var searchBox = new SearchBox("searchBox", "search",false,'Search');
|
||||
</script>
|
||||
<div id="navrow1" class="tabs">
|
||||
<ul class="tablist">
|
||||
<li><a href="index.html"><span>Main Page</span></a></li>
|
||||
<li><a href="modules.html"><span>Modules</span></a></li>
|
||||
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
|
||||
<li><a href="annotated.html"><span>Classes</span></a></li>
|
||||
<li class="current"><a href="files.html"><span>Files</span></a></li>
|
||||
<li>
|
||||
<div id="MSearchBox" class="MSearchBoxInactive">
|
||||
<span class="left">
|
||||
<img id="MSearchSelect" src="search/mag_sel.png"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
alt=""/>
|
||||
<input type="text" id="MSearchField" value="Search" accesskey="S"
|
||||
onfocus="searchBox.OnSearchFieldFocus(true)"
|
||||
onblur="searchBox.OnSearchFieldFocus(false)"
|
||||
onkeyup="searchBox.OnSearchFieldChange(event)"/>
|
||||
</span><span class="right">
|
||||
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
|
||||
</span>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div id="navrow2" class="tabs2">
|
||||
<ul class="tablist">
|
||||
<li><a href="files.html"><span>File List</span></a></li>
|
||||
<li><a href="globals.html"><span>File Members</span></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<!-- window showing the filter options -->
|
||||
<div id="MSearchSelectWindow"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
onkeydown="return searchBox.OnSearchSelectKey(event)">
|
||||
</div>
|
||||
|
||||
<!-- iframe showing the search results (closed by default) -->
|
||||
<div id="MSearchResultsWindow">
|
||||
<iframe src="javascript:void(0)" frameborder="0"
|
||||
name="MSearchResults" id="MSearchResults">
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
<div id="nav-path" class="navpath">
|
||||
<ul>
|
||||
<li class="navelem"><a class="el" href="dir_d44c64559bbebec7f509842c48db8b23.html">include</a></li><li class="navelem"><a class="el" href="dir_6baf2bb612a2f0daa69af3101ede80a1.html">cutlass</a></li><li class="navelem"><a class="el" href="dir_048c1df36ab9c2efbb0733edba6291c9.html">arch</a></li> </ul>
|
||||
</div>
|
||||
</div><!-- top -->
|
||||
<div class="header">
|
||||
<div class="summary">
|
||||
<a href="#nested-classes">Classes</a> |
|
||||
<a href="#namespaces">Namespaces</a> </div>
|
||||
<div class="headertitle">
|
||||
<div class="title">arch.h File Reference</div> </div>
|
||||
</div><!--header-->
|
||||
<div class="contents">
|
||||
|
||||
<p>Defines tags for architecture-specific configurations.
|
||||
<a href="#details">More...</a></p>
|
||||
<div class="textblock"><div class="dynheader">
|
||||
This graph shows which files directly or indirectly include this file:</div>
|
||||
<div class="dyncontent">
|
||||
<div class="center"><img src="arch_8h__dep__incl.png" border="0" usemap="#arch_8hdep" alt=""/></div>
|
||||
<map name="arch_8hdep" id="arch_8hdep">
|
||||
</map>
|
||||
</div>
|
||||
</div>
|
||||
<p><a href="arch_8h_source.html">Go to the source code of this file.</a></p>
|
||||
<table class="memberdecls">
|
||||
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="nested-classes"></a>
|
||||
Classes</h2></td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct  </td><td class="memItemRight" valign="bottom"><a class="el" href="structcutlass_1_1arch_1_1Sm50.html">cutlass::arch::Sm50</a></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct  </td><td class="memItemRight" valign="bottom"><a class="el" href="structcutlass_1_1arch_1_1Sm60.html">cutlass::arch::Sm60</a></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct  </td><td class="memItemRight" valign="bottom"><a class="el" href="structcutlass_1_1arch_1_1Sm61.html">cutlass::arch::Sm61</a></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct  </td><td class="memItemRight" valign="bottom"><a class="el" href="structcutlass_1_1arch_1_1Sm70.html">cutlass::arch::Sm70</a></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct  </td><td class="memItemRight" valign="bottom"><a class="el" href="structcutlass_1_1arch_1_1Sm72.html">cutlass::arch::Sm72</a></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct  </td><td class="memItemRight" valign="bottom"><a class="el" href="structcutlass_1_1arch_1_1Sm75.html">cutlass::arch::Sm75</a></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
</table><table class="memberdecls">
|
||||
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="namespaces"></a>
|
||||
Namespaces</h2></td></tr>
|
||||
<tr class="memitem:namespacecutlass"><td class="memItemLeft" align="right" valign="top">  </td><td class="memItemRight" valign="bottom"><a class="el" href="namespacecutlass.html">cutlass</a></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:namespacecutlass_1_1arch"><td class="memItemLeft" align="right" valign="top">  </td><td class="memItemRight" valign="bottom"><a class="el" href="namespacecutlass_1_1arch.html">cutlass::arch</a></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
</table>
|
||||
</div><!-- contents -->
|
||||
<!-- start footer part -->
|
||||
<hr class="footer"/><address class="footer"><small>
|
||||
Generated by  <a href="http://www.doxygen.org/index.html">
|
||||
<img class="footer" src="doxygen.png" alt="doxygen"/>
|
||||
</a> 1.8.11
|
||||
</small></address>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1 @@
|
|||
9ea32ea41ab87776449ab855965480b3
|
||||
File diff suppressed because one or more lines are too long
|
|
@ -0,0 +1,167 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
|
||||
<meta name="generator" content="Doxygen 1.8.11"/>
|
||||
<title>CUTLASS: array.h File Reference</title>
|
||||
<link href="tabs.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="jquery.js"></script>
|
||||
<script type="text/javascript" src="dynsections.js"></script>
|
||||
<link href="search/search.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="search/searchdata.js"></script>
|
||||
<script type="text/javascript" src="search/search.js"></script>
|
||||
<script type="text/javascript">
|
||||
$(document).ready(function() { init_search(); });
|
||||
</script>
|
||||
<script type="text/x-mathjax-config">
|
||||
MathJax.Hub.Config({
|
||||
extensions: ["tex2jax.js"],
|
||||
jax: ["input/TeX","output/HTML-CSS"],
|
||||
});
|
||||
</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
|
||||
<link href="doxygen.css" rel="stylesheet" type="text/css" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
|
||||
<div id="titlearea">
|
||||
<table cellspacing="0" cellpadding="0">
|
||||
<tbody>
|
||||
<tr style="height: 56px;">
|
||||
<td id="projectlogo"><img alt="Logo" src="cutlass-logo-small.png"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">CUTLASS
|
||||
</div>
|
||||
<div id="projectbrief">CUDA Templates for Linear Algebra Subroutines and Solvers</div>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<!-- end header part -->
|
||||
<!-- Generated by Doxygen 1.8.11 -->
|
||||
<script type="text/javascript">
|
||||
var searchBox = new SearchBox("searchBox", "search",false,'Search');
|
||||
</script>
|
||||
<div id="navrow1" class="tabs">
|
||||
<ul class="tablist">
|
||||
<li><a href="index.html"><span>Main Page</span></a></li>
|
||||
<li><a href="modules.html"><span>Modules</span></a></li>
|
||||
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
|
||||
<li><a href="annotated.html"><span>Classes</span></a></li>
|
||||
<li class="current"><a href="files.html"><span>Files</span></a></li>
|
||||
<li>
|
||||
<div id="MSearchBox" class="MSearchBoxInactive">
|
||||
<span class="left">
|
||||
<img id="MSearchSelect" src="search/mag_sel.png"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
alt=""/>
|
||||
<input type="text" id="MSearchField" value="Search" accesskey="S"
|
||||
onfocus="searchBox.OnSearchFieldFocus(true)"
|
||||
onblur="searchBox.OnSearchFieldFocus(false)"
|
||||
onkeyup="searchBox.OnSearchFieldChange(event)"/>
|
||||
</span><span class="right">
|
||||
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
|
||||
</span>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div id="navrow2" class="tabs2">
|
||||
<ul class="tablist">
|
||||
<li><a href="files.html"><span>File List</span></a></li>
|
||||
<li><a href="globals.html"><span>File Members</span></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<!-- window showing the filter options -->
|
||||
<div id="MSearchSelectWindow"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
onkeydown="return searchBox.OnSearchSelectKey(event)">
|
||||
</div>
|
||||
|
||||
<!-- iframe showing the search results (closed by default) -->
|
||||
<div id="MSearchResultsWindow">
|
||||
<iframe src="javascript:void(0)" frameborder="0"
|
||||
name="MSearchResults" id="MSearchResults">
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
<div id="nav-path" class="navpath">
|
||||
<ul>
|
||||
<li class="navelem"><a class="el" href="dir_d44c64559bbebec7f509842c48db8b23.html">include</a></li><li class="navelem"><a class="el" href="dir_6baf2bb612a2f0daa69af3101ede80a1.html">cutlass</a></li> </ul>
|
||||
</div>
|
||||
</div><!-- top -->
|
||||
<div class="header">
|
||||
<div class="summary">
|
||||
<a href="#nested-classes">Classes</a> |
|
||||
<a href="#namespaces">Namespaces</a> |
|
||||
<a href="#func-members">Functions</a> </div>
|
||||
<div class="headertitle">
|
||||
<div class="title">array.h File Reference</div> </div>
|
||||
</div><!--header-->
|
||||
<div class="contents">
|
||||
|
||||
<p>Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe to use in a union.
|
||||
<a href="#details">More...</a></p>
|
||||
<div class="textblock"><code>#include "<a class="el" href="cutlass_8h_source.html">cutlass/cutlass.h</a>"</code><br />
|
||||
<code>#include "<a class="el" href="numeric__types_8h_source.html">cutlass/numeric_types.h</a>"</code><br />
|
||||
<code>#include "<a class="el" href="array__subbyte_8h_source.html">cutlass/array_subbyte.h</a>"</code><br />
|
||||
</div><div class="textblock"><div class="dynheader">
|
||||
Include dependency graph for array.h:</div>
|
||||
<div class="dyncontent">
|
||||
<div class="center"><img src="array_8h__incl.png" border="0" usemap="#array_8h" alt=""/></div>
|
||||
<map name="array_8h" id="array_8h">
|
||||
</map>
|
||||
</div>
|
||||
</div>
|
||||
<p><a href="array_8h_source.html">Go to the source code of this file.</a></p>
|
||||
<table class="memberdecls">
|
||||
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="nested-classes"></a>
|
||||
Classes</h2></td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct  </td><td class="memItemRight" valign="bottom"><a class="el" href="structcutlass_1_1sizeof__bits_3_01Array_3_01T_00_01N_00_01RegisterSized_01_4_01_4.html">cutlass::sizeof_bits< Array< T, N, RegisterSized > ></a></td></tr>
|
||||
<tr class="memdesc:"><td class="mdescLeft"> </td><td class="mdescRight">Statically sized array for any data type. <a href="structcutlass_1_1sizeof__bits_3_01Array_3_01T_00_01N_00_01RegisterSized_01_4_01_4.html#details">More...</a><br /></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">class  </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td></tr>
|
||||
<tr class="memdesc:"><td class="mdescLeft"> </td><td class="mdescRight">Statically sized array for any data type. <a href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#details">More...</a><br /></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">class  </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1iterator.html">cutlass::Array< T, N, true >::iterator</a></td></tr>
|
||||
<tr class="memdesc:"><td class="mdescLeft"> </td><td class="mdescRight">Bidirectional iterator over elements. <a href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1iterator.html#details">More...</a><br /></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">class  </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html">cutlass::Array< T, N, true >::const_iterator</a></td></tr>
|
||||
<tr class="memdesc:"><td class="mdescLeft"> </td><td class="mdescRight">Bidirectional constant iterator over elements. <a href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html#details">More...</a><br /></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">class  </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1reverse__iterator.html">cutlass::Array< T, N, true >::reverse_iterator</a></td></tr>
|
||||
<tr class="memdesc:"><td class="mdescLeft"> </td><td class="mdescRight">Bidirectional iterator over elements. <a href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1reverse__iterator.html#details">More...</a><br /></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">class  </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__reverse__iterator.html">cutlass::Array< T, N, true >::const_reverse_iterator</a></td></tr>
|
||||
<tr class="memdesc:"><td class="mdescLeft"> </td><td class="mdescRight">Bidirectional constant iterator over elements. <a href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__reverse__iterator.html#details">More...</a><br /></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">class  </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1AlignedArray.html">cutlass::AlignedArray< T, N, Alignment ></a></td></tr>
|
||||
<tr class="memdesc:"><td class="mdescLeft"> </td><td class="mdescRight">Aligned array type. <a href="classcutlass_1_1AlignedArray.html#details">More...</a><br /></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
</table><table class="memberdecls">
|
||||
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="namespaces"></a>
|
||||
Namespaces</h2></td></tr>
|
||||
<tr class="memitem:namespacecutlass"><td class="memItemLeft" align="right" valign="top">  </td><td class="memItemRight" valign="bottom"><a class="el" href="namespacecutlass.html">cutlass</a></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
</table><table class="memberdecls">
|
||||
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="func-members"></a>
|
||||
Functions</h2></td></tr>
|
||||
<tr class="memitem:a935aabfdc47cf03f87c67bb22533f97f"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> <a class="el" href="platform_8h.html#a72f0657181cca64b44eb186b707eb380">constexpr</a> bool </td><td class="memItemRight" valign="bottom"><a class="el" href="namespacecutlass.html#a935aabfdc47cf03f87c67bb22533f97f">cutlass::ispow2</a> (unsigned x)</td></tr>
|
||||
<tr class="memdesc:a935aabfdc47cf03f87c67bb22533f97f"><td class="mdescLeft"> </td><td class="mdescRight">Returns true if the argument is a power of 2. <a href="namespacecutlass.html#a935aabfdc47cf03f87c67bb22533f97f">More...</a><br /></td></tr>
|
||||
<tr class="separator:a935aabfdc47cf03f87c67bb22533f97f"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:ac16d8caf23537912eb02123c4bdacd14"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> <a class="el" href="platform_8h.html#a72f0657181cca64b44eb186b707eb380">constexpr</a> unsigned </td><td class="memItemRight" valign="bottom"><a class="el" href="namespacecutlass.html#ac16d8caf23537912eb02123c4bdacd14">cutlass::floor_pow_2</a> (unsigned x)</td></tr>
|
||||
<tr class="memdesc:ac16d8caf23537912eb02123c4bdacd14"><td class="mdescLeft"> </td><td class="mdescRight">Returns the largest power of two not greater than the argument. <a href="namespacecutlass.html#ac16d8caf23537912eb02123c4bdacd14">More...</a><br /></td></tr>
|
||||
<tr class="separator:ac16d8caf23537912eb02123c4bdacd14"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
</table>
|
||||
</div><!-- contents -->
|
||||
<!-- start footer part -->
|
||||
<hr class="footer"/><address class="footer"><small>
|
||||
Generated by  <a href="http://www.doxygen.org/index.html">
|
||||
<img class="footer" src="doxygen.png" alt="doxygen"/>
|
||||
</a> 1.8.11
|
||||
</small></address>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1 @@
|
|||
90c159bd7ad938ad2d6e263ea8402fe7
|
||||
File diff suppressed because one or more lines are too long
|
|
@ -0,0 +1,164 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
|
||||
<meta name="generator" content="Doxygen 1.8.11"/>
|
||||
<title>CUTLASS: array_subbyte.h File Reference</title>
|
||||
<link href="tabs.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="jquery.js"></script>
|
||||
<script type="text/javascript" src="dynsections.js"></script>
|
||||
<link href="search/search.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="search/searchdata.js"></script>
|
||||
<script type="text/javascript" src="search/search.js"></script>
|
||||
<script type="text/javascript">
|
||||
$(document).ready(function() { init_search(); });
|
||||
</script>
|
||||
<script type="text/x-mathjax-config">
|
||||
MathJax.Hub.Config({
|
||||
extensions: ["tex2jax.js"],
|
||||
jax: ["input/TeX","output/HTML-CSS"],
|
||||
});
|
||||
</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
|
||||
<link href="doxygen.css" rel="stylesheet" type="text/css" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
|
||||
<div id="titlearea">
|
||||
<table cellspacing="0" cellpadding="0">
|
||||
<tbody>
|
||||
<tr style="height: 56px;">
|
||||
<td id="projectlogo"><img alt="Logo" src="cutlass-logo-small.png"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">CUTLASS
|
||||
</div>
|
||||
<div id="projectbrief">CUDA Templates for Linear Algebra Subroutines and Solvers</div>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<!-- end header part -->
|
||||
<!-- Generated by Doxygen 1.8.11 -->
|
||||
<script type="text/javascript">
|
||||
var searchBox = new SearchBox("searchBox", "search",false,'Search');
|
||||
</script>
|
||||
<div id="navrow1" class="tabs">
|
||||
<ul class="tablist">
|
||||
<li><a href="index.html"><span>Main Page</span></a></li>
|
||||
<li><a href="modules.html"><span>Modules</span></a></li>
|
||||
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
|
||||
<li><a href="annotated.html"><span>Classes</span></a></li>
|
||||
<li class="current"><a href="files.html"><span>Files</span></a></li>
|
||||
<li>
|
||||
<div id="MSearchBox" class="MSearchBoxInactive">
|
||||
<span class="left">
|
||||
<img id="MSearchSelect" src="search/mag_sel.png"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
alt=""/>
|
||||
<input type="text" id="MSearchField" value="Search" accesskey="S"
|
||||
onfocus="searchBox.OnSearchFieldFocus(true)"
|
||||
onblur="searchBox.OnSearchFieldFocus(false)"
|
||||
onkeyup="searchBox.OnSearchFieldChange(event)"/>
|
||||
</span><span class="right">
|
||||
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
|
||||
</span>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div id="navrow2" class="tabs2">
|
||||
<ul class="tablist">
|
||||
<li><a href="files.html"><span>File List</span></a></li>
|
||||
<li><a href="globals.html"><span>File Members</span></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<!-- window showing the filter options -->
|
||||
<div id="MSearchSelectWindow"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
onkeydown="return searchBox.OnSearchSelectKey(event)">
|
||||
</div>
|
||||
|
||||
<!-- iframe showing the search results (closed by default) -->
|
||||
<div id="MSearchResultsWindow">
|
||||
<iframe src="javascript:void(0)" frameborder="0"
|
||||
name="MSearchResults" id="MSearchResults">
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
<div id="nav-path" class="navpath">
|
||||
<ul>
|
||||
<li class="navelem"><a class="el" href="dir_d44c64559bbebec7f509842c48db8b23.html">include</a></li><li class="navelem"><a class="el" href="dir_6baf2bb612a2f0daa69af3101ede80a1.html">cutlass</a></li> </ul>
|
||||
</div>
|
||||
</div><!-- top -->
|
||||
<div class="header">
|
||||
<div class="summary">
|
||||
<a href="#nested-classes">Classes</a> |
|
||||
<a href="#namespaces">Namespaces</a> </div>
|
||||
<div class="headertitle">
|
||||
<div class="title">array_subbyte.h File Reference</div> </div>
|
||||
</div><!--header-->
|
||||
<div class="contents">
|
||||
|
||||
<p>Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe to use in a union.
|
||||
<a href="#details">More...</a></p>
|
||||
<div class="textblock"><code>#include "<a class="el" href="cutlass_8h_source.html">cutlass/cutlass.h</a>"</code><br />
|
||||
<code>#include "<a class="el" href="array_8h_source.html">cutlass/array.h</a>"</code><br />
|
||||
<code>#include "<a class="el" href="platform_8h_source.html">cutlass/platform/platform.h</a>"</code><br />
|
||||
</div><div class="textblock"><div class="dynheader">
|
||||
Include dependency graph for array_subbyte.h:</div>
|
||||
<div class="dyncontent">
|
||||
<div class="center"><img src="array__subbyte_8h__incl.png" border="0" usemap="#array__subbyte_8h" alt=""/></div>
|
||||
<map name="array__subbyte_8h" id="array__subbyte_8h">
|
||||
</map>
|
||||
</div>
|
||||
</div><div class="textblock"><div class="dynheader">
|
||||
This graph shows which files directly or indirectly include this file:</div>
|
||||
<div class="dyncontent">
|
||||
<div class="center"><img src="array__subbyte_8h__dep__incl.png" border="0" usemap="#array__subbyte_8hdep" alt=""/></div>
|
||||
<map name="array__subbyte_8hdep" id="array__subbyte_8hdep">
|
||||
</map>
|
||||
</div>
|
||||
</div>
|
||||
<p><a href="array__subbyte_8h_source.html">Go to the source code of this file.</a></p>
|
||||
<table class="memberdecls">
|
||||
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="nested-classes"></a>
|
||||
Classes</h2></td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">class  </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td></tr>
|
||||
<tr class="memdesc:"><td class="mdescLeft"> </td><td class="mdescRight">Statically sized array for any data type. <a href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#details">More...</a><br /></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">class  </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reference.html">cutlass::Array< T, N, false >::reference</a></td></tr>
|
||||
<tr class="memdesc:"><td class="mdescLeft"> </td><td class="mdescRight">Reference object inserts or extracts sub-byte items. <a href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reference.html#details">More...</a><br /></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">class  </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reference.html">cutlass::Array< T, N, false >::const_reference</a></td></tr>
|
||||
<tr class="memdesc:"><td class="mdescLeft"> </td><td class="mdescRight">Reference object extracts sub-byte items. <a href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reference.html#details">More...</a><br /></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">class  </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html">cutlass::Array< T, N, false >::iterator</a></td></tr>
|
||||
<tr class="memdesc:"><td class="mdescLeft"> </td><td class="mdescRight">Bidirectional iterator over elements. <a href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html#details">More...</a><br /></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">class  </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__iterator.html">cutlass::Array< T, N, false >::const_iterator</a></td></tr>
|
||||
<tr class="memdesc:"><td class="mdescLeft"> </td><td class="mdescRight">Bidirectional constant iterator over elements. <a href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__iterator.html#details">More...</a><br /></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">class  </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reverse__iterator.html">cutlass::Array< T, N, false >::reverse_iterator</a></td></tr>
|
||||
<tr class="memdesc:"><td class="mdescLeft"> </td><td class="mdescRight">Bidirectional iterator over elements. <a href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reverse__iterator.html#details">More...</a><br /></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">class  </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reverse__iterator.html">cutlass::Array< T, N, false >::const_reverse_iterator</a></td></tr>
|
||||
<tr class="memdesc:"><td class="mdescLeft"> </td><td class="mdescRight">Bidirectional constant iterator over elements. <a href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reverse__iterator.html#details">More...</a><br /></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
</table><table class="memberdecls">
|
||||
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="namespaces"></a>
|
||||
Namespaces</h2></td></tr>
|
||||
<tr class="memitem:namespacecutlass"><td class="memItemLeft" align="right" valign="top">  </td><td class="memItemRight" valign="bottom"><a class="el" href="namespacecutlass.html">cutlass</a></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
</table>
|
||||
</div><!-- contents -->
|
||||
<!-- start footer part -->
|
||||
<hr class="footer"/><address class="footer"><small>
|
||||
Generated by  <a href="http://www.doxygen.org/index.html">
|
||||
<img class="footer" src="doxygen.png" alt="doxygen"/>
|
||||
</a> 1.8.11
|
||||
</small></address>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1 @@
|
|||
7c0288c037b6ea169ec7a3aa1015a4d4
|
||||
|
|
@ -0,0 +1 @@
|
|||
36310516438810c2a8ba31a7816cd1de
|
||||
File diff suppressed because one or more lines are too long
|
|
@ -0,0 +1,155 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
|
||||
<meta name="generator" content="Doxygen 1.8.11"/>
|
||||
<title>CUTLASS: batched_reduction.h File Reference</title>
|
||||
<link href="tabs.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="jquery.js"></script>
|
||||
<script type="text/javascript" src="dynsections.js"></script>
|
||||
<link href="search/search.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="search/searchdata.js"></script>
|
||||
<script type="text/javascript" src="search/search.js"></script>
|
||||
<script type="text/javascript">
|
||||
$(document).ready(function() { init_search(); });
|
||||
</script>
|
||||
<script type="text/x-mathjax-config">
|
||||
MathJax.Hub.Config({
|
||||
extensions: ["tex2jax.js"],
|
||||
jax: ["input/TeX","output/HTML-CSS"],
|
||||
});
|
||||
</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
|
||||
<link href="doxygen.css" rel="stylesheet" type="text/css" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
|
||||
<div id="titlearea">
|
||||
<table cellspacing="0" cellpadding="0">
|
||||
<tbody>
|
||||
<tr style="height: 56px;">
|
||||
<td id="projectlogo"><img alt="Logo" src="cutlass-logo-small.png"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">CUTLASS
|
||||
</div>
|
||||
<div id="projectbrief">CUDA Templates for Linear Algebra Subroutines and Solvers</div>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<!-- end header part -->
|
||||
<!-- Generated by Doxygen 1.8.11 -->
|
||||
<script type="text/javascript">
|
||||
var searchBox = new SearchBox("searchBox", "search",false,'Search');
|
||||
</script>
|
||||
<div id="navrow1" class="tabs">
|
||||
<ul class="tablist">
|
||||
<li><a href="index.html"><span>Main Page</span></a></li>
|
||||
<li><a href="modules.html"><span>Modules</span></a></li>
|
||||
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
|
||||
<li><a href="annotated.html"><span>Classes</span></a></li>
|
||||
<li class="current"><a href="files.html"><span>Files</span></a></li>
|
||||
<li>
|
||||
<div id="MSearchBox" class="MSearchBoxInactive">
|
||||
<span class="left">
|
||||
<img id="MSearchSelect" src="search/mag_sel.png"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
alt=""/>
|
||||
<input type="text" id="MSearchField" value="Search" accesskey="S"
|
||||
onfocus="searchBox.OnSearchFieldFocus(true)"
|
||||
onblur="searchBox.OnSearchFieldFocus(false)"
|
||||
onkeyup="searchBox.OnSearchFieldChange(event)"/>
|
||||
</span><span class="right">
|
||||
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
|
||||
</span>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div id="navrow2" class="tabs2">
|
||||
<ul class="tablist">
|
||||
<li><a href="files.html"><span>File List</span></a></li>
|
||||
<li><a href="globals.html"><span>File Members</span></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<!-- window showing the filter options -->
|
||||
<div id="MSearchSelectWindow"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
onkeydown="return searchBox.OnSearchSelectKey(event)">
|
||||
</div>
|
||||
|
||||
<!-- iframe showing the search results (closed by default) -->
|
||||
<div id="MSearchResultsWindow">
|
||||
<iframe src="javascript:void(0)" frameborder="0"
|
||||
name="MSearchResults" id="MSearchResults">
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
<div id="nav-path" class="navpath">
|
||||
<ul>
|
||||
<li class="navelem"><a class="el" href="dir_d44c64559bbebec7f509842c48db8b23.html">include</a></li><li class="navelem"><a class="el" href="dir_6baf2bb612a2f0daa69af3101ede80a1.html">cutlass</a></li><li class="navelem"><a class="el" href="dir_ac488927e63b76ba9cb3ad9c317bbde9.html">reduction</a></li> </ul>
|
||||
</div>
|
||||
</div><!-- top -->
|
||||
<div class="header">
|
||||
<div class="summary">
|
||||
<a href="#nested-classes">Classes</a> |
|
||||
<a href="#namespaces">Namespaces</a> |
|
||||
<a href="#func-members">Functions</a> </div>
|
||||
<div class="headertitle">
|
||||
<div class="title">batched_reduction.h File Reference</div> </div>
|
||||
</div><!--header-->
|
||||
<div class="contents">
|
||||
|
||||
<p>Implements a software-pipelined efficient batched reduction. D = alpha * Reduction(A) + beta * C.
|
||||
<a href="#details">More...</a></p>
|
||||
<div class="textblock"><code>#include <cuda.h></code><br />
|
||||
<code>#include "<a class="el" href="coord_8h_source.html">cutlass/coord.h</a>"</code><br />
|
||||
<code>#include "cutlass/util/platform.h"</code><br />
|
||||
<code>#include "cutlass/fragment.h"</code><br />
|
||||
</div><div class="textblock"><div class="dynheader">
|
||||
Include dependency graph for batched_reduction.h:</div>
|
||||
<div class="dyncontent">
|
||||
<div class="center"><img src="batched__reduction_8h__incl.png" border="0" usemap="#batched__reduction_8h" alt=""/></div>
|
||||
<map name="batched__reduction_8h" id="batched__reduction_8h">
|
||||
</map>
|
||||
</div>
|
||||
</div><div class="textblock"><div class="dynheader">
|
||||
This graph shows which files directly or indirectly include this file:</div>
|
||||
<div class="dyncontent">
|
||||
<div class="center"><img src="batched__reduction_8h__dep__incl.png" border="0" usemap="#batched__reduction_8hdep" alt=""/></div>
|
||||
<map name="batched__reduction_8hdep" id="batched__reduction_8hdep">
|
||||
</map>
|
||||
</div>
|
||||
</div>
|
||||
<p><a href="batched__reduction_8h_source.html">Go to the source code of this file.</a></p>
|
||||
<table class="memberdecls">
|
||||
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="nested-classes"></a>
|
||||
Classes</h2></td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct  </td><td class="memItemRight" valign="bottom"><a class="el" href="structcutlass_1_1reduction_1_1BatchedReduction.html">cutlass::reduction::BatchedReduction< BatchedReductionTraits_ ></a></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
</table><table class="memberdecls">
|
||||
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="namespaces"></a>
|
||||
Namespaces</h2></td></tr>
|
||||
<tr class="memitem:namespacecutlass"><td class="memItemLeft" align="right" valign="top">  </td><td class="memItemRight" valign="bottom"><a class="el" href="namespacecutlass.html">cutlass</a></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:namespacecutlass_1_1reduction"><td class="memItemLeft" align="right" valign="top">  </td><td class="memItemRight" valign="bottom"><a class="el" href="namespacecutlass_1_1reduction.html">cutlass::reduction</a></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
</table><table class="memberdecls">
|
||||
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="func-members"></a>
|
||||
Functions</h2></td></tr>
|
||||
<tr class="memitem:a9665e8f438a7b290d6e2eb640d93045f"><td class="memTemplParams" colspan="2">template<typename batched_reduction_ > </td></tr>
|
||||
<tr class="memitem:a9665e8f438a7b290d6e2eb640d93045f"><td class="memTemplItemLeft" align="right" valign="top">__global__ </td><td class="memTemplItemRight" valign="bottom"><a class="el" href="namespacecutlass_1_1reduction.html#a9665e8f438a7b290d6e2eb640d93045f">cutlass::reduction::__launch_bounds__</a> (batched_reduction_::Traits::kThreads, 1) void batched_reduction_kernel(typename batched_reduction_</td></tr>
|
||||
<tr class="separator:a9665e8f438a7b290d6e2eb640d93045f"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
</table>
|
||||
</div><!-- contents -->
|
||||
<!-- start footer part -->
|
||||
<hr class="footer"/><address class="footer"><small>
|
||||
Generated by  <a href="http://www.doxygen.org/index.html">
|
||||
<img class="footer" src="doxygen.png" alt="doxygen"/>
|
||||
</a> 1.8.11
|
||||
</small></address>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1 @@
|
|||
2bce650f452329d669d303788cc619c8
|
||||
|
|
@ -0,0 +1 @@
|
|||
d38876c9b9d3ade81fb457e3ebf5c6fd
|
||||
File diff suppressed because one or more lines are too long
|
|
@ -0,0 +1,144 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
|
||||
<meta name="generator" content="Doxygen 1.8.11"/>
|
||||
<title>CUTLASS: batched_reduction_traits.h File Reference</title>
|
||||
<link href="tabs.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="jquery.js"></script>
|
||||
<script type="text/javascript" src="dynsections.js"></script>
|
||||
<link href="search/search.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="search/searchdata.js"></script>
|
||||
<script type="text/javascript" src="search/search.js"></script>
|
||||
<script type="text/javascript">
|
||||
$(document).ready(function() { init_search(); });
|
||||
</script>
|
||||
<script type="text/x-mathjax-config">
|
||||
MathJax.Hub.Config({
|
||||
extensions: ["tex2jax.js"],
|
||||
jax: ["input/TeX","output/HTML-CSS"],
|
||||
});
|
||||
</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
|
||||
<link href="doxygen.css" rel="stylesheet" type="text/css" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
|
||||
<div id="titlearea">
|
||||
<table cellspacing="0" cellpadding="0">
|
||||
<tbody>
|
||||
<tr style="height: 56px;">
|
||||
<td id="projectlogo"><img alt="Logo" src="cutlass-logo-small.png"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">CUTLASS
|
||||
</div>
|
||||
<div id="projectbrief">CUDA Templates for Linear Algebra Subroutines and Solvers</div>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<!-- end header part -->
|
||||
<!-- Generated by Doxygen 1.8.11 -->
|
||||
<script type="text/javascript">
|
||||
var searchBox = new SearchBox("searchBox", "search",false,'Search');
|
||||
</script>
|
||||
<div id="navrow1" class="tabs">
|
||||
<ul class="tablist">
|
||||
<li><a href="index.html"><span>Main Page</span></a></li>
|
||||
<li><a href="modules.html"><span>Modules</span></a></li>
|
||||
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
|
||||
<li><a href="annotated.html"><span>Classes</span></a></li>
|
||||
<li class="current"><a href="files.html"><span>Files</span></a></li>
|
||||
<li>
|
||||
<div id="MSearchBox" class="MSearchBoxInactive">
|
||||
<span class="left">
|
||||
<img id="MSearchSelect" src="search/mag_sel.png"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
alt=""/>
|
||||
<input type="text" id="MSearchField" value="Search" accesskey="S"
|
||||
onfocus="searchBox.OnSearchFieldFocus(true)"
|
||||
onblur="searchBox.OnSearchFieldFocus(false)"
|
||||
onkeyup="searchBox.OnSearchFieldChange(event)"/>
|
||||
</span><span class="right">
|
||||
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
|
||||
</span>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div id="navrow2" class="tabs2">
|
||||
<ul class="tablist">
|
||||
<li><a href="files.html"><span>File List</span></a></li>
|
||||
<li><a href="globals.html"><span>File Members</span></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<!-- window showing the filter options -->
|
||||
<div id="MSearchSelectWindow"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
onkeydown="return searchBox.OnSearchSelectKey(event)">
|
||||
</div>
|
||||
|
||||
<!-- iframe showing the search results (closed by default) -->
|
||||
<div id="MSearchResultsWindow">
|
||||
<iframe src="javascript:void(0)" frameborder="0"
|
||||
name="MSearchResults" id="MSearchResults">
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
<div id="nav-path" class="navpath">
|
||||
<ul>
|
||||
<li class="navelem"><a class="el" href="dir_d44c64559bbebec7f509842c48db8b23.html">include</a></li><li class="navelem"><a class="el" href="dir_6baf2bb612a2f0daa69af3101ede80a1.html">cutlass</a></li><li class="navelem"><a class="el" href="dir_ac488927e63b76ba9cb3ad9c317bbde9.html">reduction</a></li> </ul>
|
||||
</div>
|
||||
</div><!-- top -->
|
||||
<div class="header">
|
||||
<div class="summary">
|
||||
<a href="#nested-classes">Classes</a> |
|
||||
<a href="#namespaces">Namespaces</a> </div>
|
||||
<div class="headertitle">
|
||||
<div class="title">batched_reduction_traits.h File Reference</div> </div>
|
||||
</div><!--header-->
|
||||
<div class="contents">
|
||||
|
||||
<p>Defines structural properties of complete batched reduction. D = alpha * Reduction(A) + beta * C.
|
||||
<a href="#details">More...</a></p>
|
||||
<div class="textblock"><code>#include "<a class="el" href="cutlass_8h_source.html">cutlass/cutlass.h</a>"</code><br />
|
||||
<code>#include "cutlass/shape.h"</code><br />
|
||||
<code>#include "<a class="el" href="reduction_2threadblock__swizzle_8h_source.html">cutlass/reduction/threadblock_swizzle.h</a>"</code><br />
|
||||
<code>#include "<a class="el" href="batched__reduction_8h_source.html">cutlass/reduction/batched_reduction.h</a>"</code><br />
|
||||
<code>#include "cutlass/gemm/linear_scaling.h"</code><br />
|
||||
</div><div class="textblock"><div class="dynheader">
|
||||
Include dependency graph for batched_reduction_traits.h:</div>
|
||||
<div class="dyncontent">
|
||||
<div class="center"><img src="batched__reduction__traits_8h__incl.png" border="0" usemap="#batched__reduction__traits_8h" alt=""/></div>
|
||||
<map name="batched__reduction__traits_8h" id="batched__reduction__traits_8h">
|
||||
</map>
|
||||
</div>
|
||||
</div>
|
||||
<p><a href="batched__reduction__traits_8h_source.html">Go to the source code of this file.</a></p>
|
||||
<table class="memberdecls">
|
||||
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="nested-classes"></a>
|
||||
Classes</h2></td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct  </td><td class="memItemRight" valign="bottom"><a class="el" href="structcutlass_1_1reduction_1_1BatchedReductionTraits.html">cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ ></a></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct  </td><td class="memItemRight" valign="bottom"><a class="el" href="structcutlass_1_1reduction_1_1BatchedReductionTraits_1_1Params.html">cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::Params</a></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
</table><table class="memberdecls">
|
||||
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="namespaces"></a>
|
||||
Namespaces</h2></td></tr>
|
||||
<tr class="memitem:namespacecutlass"><td class="memItemLeft" align="right" valign="top">  </td><td class="memItemRight" valign="bottom"><a class="el" href="namespacecutlass.html">cutlass</a></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:namespacecutlass_1_1reduction"><td class="memItemLeft" align="right" valign="top">  </td><td class="memItemRight" valign="bottom"><a class="el" href="namespacecutlass_1_1reduction.html">cutlass::reduction</a></td></tr>
|
||||
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
</table>
|
||||
</div><!-- contents -->
|
||||
<!-- start footer part -->
|
||||
<hr class="footer"/><address class="footer"><small>
|
||||
Generated by  <a href="http://www.doxygen.org/index.html">
|
||||
<img class="footer" src="doxygen.png" alt="doxygen"/>
|
||||
</a> 1.8.11
|
||||
</small></address>
|
||||
</body>
|
||||
</html>
|
||||
1
flashinfer_0.3.1/3rdparty/cutlass/docs/batched__reduction__traits_8h__incl.md5
vendored
Normal file
1
flashinfer_0.3.1/3rdparty/cutlass/docs/batched__reduction__traits_8h__incl.md5
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
957af6c3e40d98d122a3ef83474f7252
|
||||
150
flashinfer_0.3.1/3rdparty/cutlass/docs/batched__reduction__traits_8h_source.html
vendored
Normal file
150
flashinfer_0.3.1/3rdparty/cutlass/docs/batched__reduction__traits_8h_source.html
vendored
Normal file
File diff suppressed because one or more lines are too long
|
|
@ -0,0 +1,133 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
|
||||
<meta name="generator" content="Doxygen 1.8.11"/>
|
||||
<title>CUTLASS: cutlass::AlignedArray< T, N, Alignment > Class Template Reference</title>
|
||||
<link href="tabs.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="jquery.js"></script>
|
||||
<script type="text/javascript" src="dynsections.js"></script>
|
||||
<link href="search/search.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="search/searchdata.js"></script>
|
||||
<script type="text/javascript" src="search/search.js"></script>
|
||||
<script type="text/javascript">
|
||||
$(document).ready(function() { init_search(); });
|
||||
</script>
|
||||
<script type="text/x-mathjax-config">
|
||||
MathJax.Hub.Config({
|
||||
extensions: ["tex2jax.js"],
|
||||
jax: ["input/TeX","output/HTML-CSS"],
|
||||
});
|
||||
</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
|
||||
<link href="doxygen.css" rel="stylesheet" type="text/css" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
|
||||
<div id="titlearea">
|
||||
<table cellspacing="0" cellpadding="0">
|
||||
<tbody>
|
||||
<tr style="height: 56px;">
|
||||
<td id="projectlogo"><img alt="Logo" src="cutlass-logo-small.png"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">CUTLASS
|
||||
</div>
|
||||
<div id="projectbrief">CUDA Templates for Linear Algebra Subroutines and Solvers</div>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<!-- end header part -->
|
||||
<!-- Generated by Doxygen 1.8.11 -->
|
||||
<script type="text/javascript">
|
||||
var searchBox = new SearchBox("searchBox", "search",false,'Search');
|
||||
</script>
|
||||
<div id="navrow1" class="tabs">
|
||||
<ul class="tablist">
|
||||
<li><a href="index.html"><span>Main Page</span></a></li>
|
||||
<li><a href="modules.html"><span>Modules</span></a></li>
|
||||
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
|
||||
<li class="current"><a href="annotated.html"><span>Classes</span></a></li>
|
||||
<li><a href="files.html"><span>Files</span></a></li>
|
||||
<li>
|
||||
<div id="MSearchBox" class="MSearchBoxInactive">
|
||||
<span class="left">
|
||||
<img id="MSearchSelect" src="search/mag_sel.png"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
alt=""/>
|
||||
<input type="text" id="MSearchField" value="Search" accesskey="S"
|
||||
onfocus="searchBox.OnSearchFieldFocus(true)"
|
||||
onblur="searchBox.OnSearchFieldFocus(false)"
|
||||
onkeyup="searchBox.OnSearchFieldChange(event)"/>
|
||||
</span><span class="right">
|
||||
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
|
||||
</span>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div id="navrow2" class="tabs2">
|
||||
<ul class="tablist">
|
||||
<li><a href="annotated.html"><span>Class List</span></a></li>
|
||||
<li><a href="classes.html"><span>Class Index</span></a></li>
|
||||
<li><a href="inherits.html"><span>Class Hierarchy</span></a></li>
|
||||
<li><a href="functions.html"><span>Class Members</span></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<!-- window showing the filter options -->
|
||||
<div id="MSearchSelectWindow"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
onkeydown="return searchBox.OnSearchSelectKey(event)">
|
||||
</div>
|
||||
|
||||
<!-- iframe showing the search results (closed by default) -->
|
||||
<div id="MSearchResultsWindow">
|
||||
<iframe src="javascript:void(0)" frameborder="0"
|
||||
name="MSearchResults" id="MSearchResults">
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
<div id="nav-path" class="navpath">
|
||||
<ul>
|
||||
<li class="navelem"><a class="el" href="namespacecutlass.html">cutlass</a></li><li class="navelem"><a class="el" href="classcutlass_1_1AlignedArray.html">AlignedArray</a></li> </ul>
|
||||
</div>
|
||||
</div><!-- top -->
|
||||
<div class="header">
|
||||
<div class="headertitle">
|
||||
<div class="title">cutlass::AlignedArray< T, N, Alignment > Class Template Reference</div> </div>
|
||||
</div><!--header-->
|
||||
<div class="contents">
|
||||
|
||||
<p>Aligned array type.
|
||||
</p>
|
||||
|
||||
<p><code>#include <<a class="el" href="array_8h_source.html">array.h</a>></code></p>
|
||||
<div class="dynheader">
|
||||
Inheritance diagram for cutlass::AlignedArray< T, N, Alignment >:</div>
|
||||
<div class="dyncontent">
|
||||
<div class="center"><img src="classcutlass_1_1AlignedArray__inherit__graph.png" border="0" usemap="#cutlass_1_1AlignedArray_3_01T_00_01N_00_01Alignment_01_4_inherit__map" alt="Inheritance graph"/></div>
|
||||
<map name="cutlass_1_1AlignedArray_3_01T_00_01N_00_01Alignment_01_4_inherit__map" id="cutlass_1_1AlignedArray_3_01T_00_01N_00_01Alignment_01_4_inherit__map">
|
||||
</map>
|
||||
<center><span class="legend">[<a href="graph_legend.html">legend</a>]</span></center></div>
|
||||
<div class="dynheader">
|
||||
Collaboration diagram for cutlass::AlignedArray< T, N, Alignment >:</div>
|
||||
<div class="dyncontent">
|
||||
<div class="center"><img src="classcutlass_1_1AlignedArray__coll__graph.png" border="0" usemap="#cutlass_1_1AlignedArray_3_01T_00_01N_00_01Alignment_01_4_coll__map" alt="Collaboration graph"/></div>
|
||||
<map name="cutlass_1_1AlignedArray_3_01T_00_01N_00_01Alignment_01_4_coll__map" id="cutlass_1_1AlignedArray_3_01T_00_01N_00_01Alignment_01_4_coll__map">
|
||||
</map>
|
||||
<center><span class="legend">[<a href="graph_legend.html">legend</a>]</span></center></div>
|
||||
<hr/>The documentation for this class was generated from the following file:<ul>
|
||||
<li><a class="el" href="array_8h_source.html">array.h</a></li>
|
||||
</ul>
|
||||
</div><!-- contents -->
|
||||
<!-- start footer part -->
|
||||
<hr class="footer"/><address class="footer"><small>
|
||||
Generated by  <a href="http://www.doxygen.org/index.html">
|
||||
<img class="footer" src="doxygen.png" alt="doxygen"/>
|
||||
</a> 1.8.11
|
||||
</small></address>
|
||||
</body>
|
||||
</html>
|
||||
1
flashinfer_0.3.1/3rdparty/cutlass/docs/classcutlass_1_1AlignedArray__coll__graph.md5
vendored
Normal file
1
flashinfer_0.3.1/3rdparty/cutlass/docs/classcutlass_1_1AlignedArray__coll__graph.md5
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
5bfb78a70e6c0c4f1dba98d2cf455a30
|
||||
1
flashinfer_0.3.1/3rdparty/cutlass/docs/classcutlass_1_1AlignedArray__inherit__graph.md5
vendored
Normal file
1
flashinfer_0.3.1/3rdparty/cutlass/docs/classcutlass_1_1AlignedArray__inherit__graph.md5
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
5bfb78a70e6c0c4f1dba98d2cf455a30
|
||||
|
|
@ -0,0 +1,153 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
|
||||
<meta name="generator" content="Doxygen 1.8.11"/>
|
||||
<title>CUTLASS: Member List</title>
|
||||
<link href="tabs.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="jquery.js"></script>
|
||||
<script type="text/javascript" src="dynsections.js"></script>
|
||||
<link href="search/search.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="search/searchdata.js"></script>
|
||||
<script type="text/javascript" src="search/search.js"></script>
|
||||
<script type="text/javascript">
|
||||
$(document).ready(function() { init_search(); });
|
||||
</script>
|
||||
<script type="text/x-mathjax-config">
|
||||
MathJax.Hub.Config({
|
||||
extensions: ["tex2jax.js"],
|
||||
jax: ["input/TeX","output/HTML-CSS"],
|
||||
});
|
||||
</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
|
||||
<link href="doxygen.css" rel="stylesheet" type="text/css" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
|
||||
<div id="titlearea">
|
||||
<table cellspacing="0" cellpadding="0">
|
||||
<tbody>
|
||||
<tr style="height: 56px;">
|
||||
<td id="projectlogo"><img alt="Logo" src="cutlass-logo-small.png"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">CUTLASS
|
||||
</div>
|
||||
<div id="projectbrief">CUDA Templates for Linear Algebra Subroutines and Solvers</div>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<!-- end header part -->
|
||||
<!-- Generated by Doxygen 1.8.11 -->
|
||||
<script type="text/javascript">
|
||||
var searchBox = new SearchBox("searchBox", "search",false,'Search');
|
||||
</script>
|
||||
<div id="navrow1" class="tabs">
|
||||
<ul class="tablist">
|
||||
<li><a href="index.html"><span>Main Page</span></a></li>
|
||||
<li><a href="modules.html"><span>Modules</span></a></li>
|
||||
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
|
||||
<li class="current"><a href="annotated.html"><span>Classes</span></a></li>
|
||||
<li><a href="files.html"><span>Files</span></a></li>
|
||||
<li>
|
||||
<div id="MSearchBox" class="MSearchBoxInactive">
|
||||
<span class="left">
|
||||
<img id="MSearchSelect" src="search/mag_sel.png"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
alt=""/>
|
||||
<input type="text" id="MSearchField" value="Search" accesskey="S"
|
||||
onfocus="searchBox.OnSearchFieldFocus(true)"
|
||||
onblur="searchBox.OnSearchFieldFocus(false)"
|
||||
onkeyup="searchBox.OnSearchFieldChange(event)"/>
|
||||
</span><span class="right">
|
||||
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
|
||||
</span>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div id="navrow2" class="tabs2">
|
||||
<ul class="tablist">
|
||||
<li><a href="annotated.html"><span>Class List</span></a></li>
|
||||
<li><a href="classes.html"><span>Class Index</span></a></li>
|
||||
<li><a href="inherits.html"><span>Class Hierarchy</span></a></li>
|
||||
<li><a href="functions.html"><span>Class Members</span></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<!-- window showing the filter options -->
|
||||
<div id="MSearchSelectWindow"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
onkeydown="return searchBox.OnSearchSelectKey(event)">
|
||||
</div>
|
||||
|
||||
<!-- iframe showing the search results (closed by default) -->
|
||||
<div id="MSearchResultsWindow">
|
||||
<iframe src="javascript:void(0)" frameborder="0"
|
||||
name="MSearchResults" id="MSearchResults">
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
<div id="nav-path" class="navpath">
|
||||
<ul>
|
||||
<li class="navelem"><a class="el" href="namespacecutlass.html">cutlass</a></li><li class="navelem"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">Array< T, N, false ></a></li> </ul>
|
||||
</div>
|
||||
</div><!-- top -->
|
||||
<div class="header">
|
||||
<div class="headertitle">
|
||||
<div class="title">cutlass::Array< T, N, false > Member List</div> </div>
|
||||
</div><!--header-->
|
||||
<div class="contents">
|
||||
|
||||
<p>This is the complete list of members for <a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a>, including all inherited members.</p>
|
||||
<table class="directory">
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#ac37d0c85dd6246ff7e08d12903f49c4d">Array</a>()</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a5d4667c3c9ebf3322ba94d43421e2577">Array</a>(Array const &x)</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a6268f2bbbdfc671cf7066ea0ee1bb46f">at</a>(size_type pos)</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a0443a4af7c9594492bfb8a84bbd12a52">at</a>(size_type pos) const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a693677ee48012a4d013d55741d38764e">back</a>()</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a2c1665d0eff4c1788b0a5a3bfa3bc63e">back</a>() const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a6e9dbf4a486f07dc72dd5140a7628971">begin</a>()</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a86a56cc907c8566068034ef8294cf7c2">cbegin</a>() const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#ae6106b72ee9035389afb313801561b16">cend</a>() const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a5b84c4dc5257f31108a0598915f03f94">clear</a>()</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a8a90423fc5483b3ee1d31f377321e9e0">const_pointer</a> typedef</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a01b9f76c6052dc2467095b91c1ebe34e">crbegin</a>() const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#abbc436f18649c1578ef95eb501872094">crend</a>() const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a1949c8a8c81dc2743328a56ff19fc933">data</a>()</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#ab617ed6c9cc6336baf1030713d6dfbbb">data</a>() const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#af8dd11bf19216707ab3340b66833c9c9">difference_type</a> typedef</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a700940b7ec4aa2c10506b8109b58b709">Element</a> typedef</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a40829269d53d097b5b7bfce32e4afcc4">empty</a>() const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a80258d6b5e43ae529cd726f0d4292619">end</a>()</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a1c31d3673a48b2ed275bd56714fbcfbe">fill</a>(T const &value)</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#aa89dd0781c0a81421589182a5402df8b">front</a>()</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#ab7ebd33505e48ab3beb6b551e8b762e5">front</a>() const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a56c28da772c3cf49799eeef4ee1eb981">kElements</a></td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"><span class="mlabel">static</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a4a6f489743eb03c5c97fe6bb3ed2fa22">kElementsPerStoredItem</a></td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"><span class="mlabel">static</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a6981c3aa259d3a1cc4818e29fa1d1423">kMask</a></td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"><span class="mlabel">static</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a45932cad6b905c9ab72889c53112d529">kSizeBits</a></td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"><span class="mlabel">static</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#afbe4f574d87e61bf18ac5b9f5a6ea8aa">kStorageElements</a></td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"><span class="mlabel">static</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a8f982c95366ce4fda90e35281adfe63c">max_size</a>() const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#aeaeeb7bddb6824adc6feb5ab912d65dc">operator[]</a>(size_type pos)</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a35db1c6ac0d42a486eb3a0a0eee95c80">operator[]</a>(size_type pos) const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a2a77712281a0ddbf880a4f6fb9aa2ea3">pointer</a> typedef</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a66e2465301e46afebf9e56c4060fb3cb">raw_data</a>()</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a16e55f7c4ae1700ae09c2bce137d06ae">raw_data</a>() const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a2098c88aed61f9b27bac37a083130336">rbegin</a>()</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a39c08a75c7cc22fcd296e6c9fefe754e">rend</a>()</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#ae1b48e77c8381a8059a09a791d6b8d37">size</a>() const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a714f3275de8a7f9d14f8b04aed45988d">size_type</a> typedef</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a878e152905d602bcdb98e0e6acd8bd82">Storage</a> typedef</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#ac1a07d3bbf76e850a948c8efe864acdb">value_type</a> typedef</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">cutlass::Array< T, N, false ></a></td><td class="entry"></td></tr>
|
||||
</table></div><!-- contents -->
|
||||
<!-- start footer part -->
|
||||
<hr class="footer"/><address class="footer"><small>
|
||||
Generated by  <a href="http://www.doxygen.org/index.html">
|
||||
<img class="footer" src="doxygen.png" alt="doxygen"/>
|
||||
</a> 1.8.11
|
||||
</small></address>
|
||||
</body>
|
||||
</html>
|
||||
1134
flashinfer_0.3.1/3rdparty/cutlass/docs/classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html
vendored
Normal file
1134
flashinfer_0.3.1/3rdparty/cutlass/docs/classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html
vendored
Normal file
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,123 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
|
||||
<meta name="generator" content="Doxygen 1.8.11"/>
|
||||
<title>CUTLASS: Member List</title>
|
||||
<link href="tabs.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="jquery.js"></script>
|
||||
<script type="text/javascript" src="dynsections.js"></script>
|
||||
<link href="search/search.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="search/searchdata.js"></script>
|
||||
<script type="text/javascript" src="search/search.js"></script>
|
||||
<script type="text/javascript">
|
||||
$(document).ready(function() { init_search(); });
|
||||
</script>
|
||||
<script type="text/x-mathjax-config">
|
||||
MathJax.Hub.Config({
|
||||
extensions: ["tex2jax.js"],
|
||||
jax: ["input/TeX","output/HTML-CSS"],
|
||||
});
|
||||
</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
|
||||
<link href="doxygen.css" rel="stylesheet" type="text/css" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
|
||||
<div id="titlearea">
|
||||
<table cellspacing="0" cellpadding="0">
|
||||
<tbody>
|
||||
<tr style="height: 56px;">
|
||||
<td id="projectlogo"><img alt="Logo" src="cutlass-logo-small.png"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">CUTLASS
|
||||
</div>
|
||||
<div id="projectbrief">CUDA Templates for Linear Algebra Subroutines and Solvers</div>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<!-- end header part -->
|
||||
<!-- Generated by Doxygen 1.8.11 -->
|
||||
<script type="text/javascript">
|
||||
var searchBox = new SearchBox("searchBox", "search",false,'Search');
|
||||
</script>
|
||||
<div id="navrow1" class="tabs">
|
||||
<ul class="tablist">
|
||||
<li><a href="index.html"><span>Main Page</span></a></li>
|
||||
<li><a href="modules.html"><span>Modules</span></a></li>
|
||||
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
|
||||
<li class="current"><a href="annotated.html"><span>Classes</span></a></li>
|
||||
<li><a href="files.html"><span>Files</span></a></li>
|
||||
<li>
|
||||
<div id="MSearchBox" class="MSearchBoxInactive">
|
||||
<span class="left">
|
||||
<img id="MSearchSelect" src="search/mag_sel.png"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
alt=""/>
|
||||
<input type="text" id="MSearchField" value="Search" accesskey="S"
|
||||
onfocus="searchBox.OnSearchFieldFocus(true)"
|
||||
onblur="searchBox.OnSearchFieldFocus(false)"
|
||||
onkeyup="searchBox.OnSearchFieldChange(event)"/>
|
||||
</span><span class="right">
|
||||
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
|
||||
</span>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div id="navrow2" class="tabs2">
|
||||
<ul class="tablist">
|
||||
<li><a href="annotated.html"><span>Class List</span></a></li>
|
||||
<li><a href="classes.html"><span>Class Index</span></a></li>
|
||||
<li><a href="inherits.html"><span>Class Hierarchy</span></a></li>
|
||||
<li><a href="functions.html"><span>Class Members</span></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<!-- window showing the filter options -->
|
||||
<div id="MSearchSelectWindow"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
onkeydown="return searchBox.OnSearchSelectKey(event)">
|
||||
</div>
|
||||
|
||||
<!-- iframe showing the search results (closed by default) -->
|
||||
<div id="MSearchResultsWindow">
|
||||
<iframe src="javascript:void(0)" frameborder="0"
|
||||
name="MSearchResults" id="MSearchResults">
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
<div id="nav-path" class="navpath">
|
||||
<ul>
|
||||
<li class="navelem"><a class="el" href="namespacecutlass.html">cutlass</a></li><li class="navelem"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">Array< T, N, false ></a></li><li class="navelem"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__iterator.html">const_iterator</a></li> </ul>
|
||||
</div>
|
||||
</div><!-- top -->
|
||||
<div class="header">
|
||||
<div class="headertitle">
|
||||
<div class="title">cutlass::Array< T, N, false >::const_iterator Member List</div> </div>
|
||||
</div><!--header-->
|
||||
<div class="contents">
|
||||
|
||||
<p>This is the complete list of members for <a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__iterator.html">cutlass::Array< T, N, false >::const_iterator</a>, including all inherited members.</p>
|
||||
<table class="directory">
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__iterator.html#a2baacc6de7180213621a2d6b2328ca7d">const_iterator</a>()</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__iterator.html">cutlass::Array< T, N, false >::const_iterator</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__iterator.html#a273a0ea9cf66fac0787e90339fd49371">const_iterator</a>(Storage const *ptr, int idx=0)</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__iterator.html">cutlass::Array< T, N, false >::const_iterator</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__iterator.html#ad8a6c87e370a53e7ff783ee4ad3d1198">operator!=</a>(iterator const &other) const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__iterator.html">cutlass::Array< T, N, false >::const_iterator</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__iterator.html#a36aa6aa70a9536a7d2750d83d53f39f3">operator*</a>() const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__iterator.html">cutlass::Array< T, N, false >::const_iterator</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__iterator.html#adcdcdf49b5d8e3ed801e2555c4f02b99">operator++</a>()</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__iterator.html">cutlass::Array< T, N, false >::const_iterator</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__iterator.html#a4094d6ae6bb6ade0f850ce96870bbc37">operator++</a>(int)</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__iterator.html">cutlass::Array< T, N, false >::const_iterator</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__iterator.html#aa2c9f9bb9601208bd784bdc821b62f3a">operator--</a>()</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__iterator.html">cutlass::Array< T, N, false >::const_iterator</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__iterator.html#a3eebbf306ba37383e98360c0aa882e34">operator--</a>(int)</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__iterator.html">cutlass::Array< T, N, false >::const_iterator</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__iterator.html#a42dd93a0f0df4ec86de4880fa9cc5843">operator==</a>(iterator const &other) const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__iterator.html">cutlass::Array< T, N, false >::const_iterator</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
</table></div><!-- contents -->
|
||||
<!-- start footer part -->
|
||||
<hr class="footer"/><address class="footer"><small>
|
||||
Generated by  <a href="http://www.doxygen.org/index.html">
|
||||
<img class="footer" src="doxygen.png" alt="doxygen"/>
|
||||
</a> 1.8.11
|
||||
</small></address>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,386 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
|
||||
<meta name="generator" content="Doxygen 1.8.11"/>
|
||||
<title>CUTLASS: cutlass::Array< T, N, false >::const_iterator Class Reference</title>
|
||||
<link href="tabs.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="jquery.js"></script>
|
||||
<script type="text/javascript" src="dynsections.js"></script>
|
||||
<link href="search/search.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="search/searchdata.js"></script>
|
||||
<script type="text/javascript" src="search/search.js"></script>
|
||||
<script type="text/javascript">
|
||||
$(document).ready(function() { init_search(); });
|
||||
</script>
|
||||
<script type="text/x-mathjax-config">
|
||||
MathJax.Hub.Config({
|
||||
extensions: ["tex2jax.js"],
|
||||
jax: ["input/TeX","output/HTML-CSS"],
|
||||
});
|
||||
</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
|
||||
<link href="doxygen.css" rel="stylesheet" type="text/css" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
|
||||
<div id="titlearea">
|
||||
<table cellspacing="0" cellpadding="0">
|
||||
<tbody>
|
||||
<tr style="height: 56px;">
|
||||
<td id="projectlogo"><img alt="Logo" src="cutlass-logo-small.png"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">CUTLASS
|
||||
</div>
|
||||
<div id="projectbrief">CUDA Templates for Linear Algebra Subroutines and Solvers</div>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<!-- end header part -->
|
||||
<!-- Generated by Doxygen 1.8.11 -->
|
||||
<script type="text/javascript">
|
||||
var searchBox = new SearchBox("searchBox", "search",false,'Search');
|
||||
</script>
|
||||
<div id="navrow1" class="tabs">
|
||||
<ul class="tablist">
|
||||
<li><a href="index.html"><span>Main Page</span></a></li>
|
||||
<li><a href="modules.html"><span>Modules</span></a></li>
|
||||
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
|
||||
<li class="current"><a href="annotated.html"><span>Classes</span></a></li>
|
||||
<li><a href="files.html"><span>Files</span></a></li>
|
||||
<li>
|
||||
<div id="MSearchBox" class="MSearchBoxInactive">
|
||||
<span class="left">
|
||||
<img id="MSearchSelect" src="search/mag_sel.png"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
alt=""/>
|
||||
<input type="text" id="MSearchField" value="Search" accesskey="S"
|
||||
onfocus="searchBox.OnSearchFieldFocus(true)"
|
||||
onblur="searchBox.OnSearchFieldFocus(false)"
|
||||
onkeyup="searchBox.OnSearchFieldChange(event)"/>
|
||||
</span><span class="right">
|
||||
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
|
||||
</span>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div id="navrow2" class="tabs2">
|
||||
<ul class="tablist">
|
||||
<li><a href="annotated.html"><span>Class List</span></a></li>
|
||||
<li><a href="classes.html"><span>Class Index</span></a></li>
|
||||
<li><a href="inherits.html"><span>Class Hierarchy</span></a></li>
|
||||
<li><a href="functions.html"><span>Class Members</span></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<!-- window showing the filter options -->
|
||||
<div id="MSearchSelectWindow"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
onkeydown="return searchBox.OnSearchSelectKey(event)">
|
||||
</div>
|
||||
|
||||
<!-- iframe showing the search results (closed by default) -->
|
||||
<div id="MSearchResultsWindow">
|
||||
<iframe src="javascript:void(0)" frameborder="0"
|
||||
name="MSearchResults" id="MSearchResults">
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
<div id="nav-path" class="navpath">
|
||||
<ul>
|
||||
<li class="navelem"><a class="el" href="namespacecutlass.html">cutlass</a></li><li class="navelem"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">Array< T, N, false ></a></li><li class="navelem"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__iterator.html">const_iterator</a></li> </ul>
|
||||
</div>
|
||||
</div><!-- top -->
|
||||
<div class="header">
|
||||
<div class="summary">
|
||||
<a href="#pub-methods">Public Member Functions</a> |
|
||||
<a href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__iterator-members.html">List of all members</a> </div>
|
||||
<div class="headertitle">
|
||||
<div class="title">cutlass::Array< T, N, false >::const_iterator Class Reference</div> </div>
|
||||
</div><!--header-->
|
||||
<div class="contents">
|
||||
|
||||
<p>Bidirectional constant iterator over elements.
|
||||
</p>
|
||||
|
||||
<p><code>#include <<a class="el" href="array__subbyte_8h_source.html">array_subbyte.h</a>></code></p>
|
||||
<table class="memberdecls">
|
||||
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-methods"></a>
|
||||
Public Member Functions</h2></td></tr>
|
||||
<tr class="memitem:a2baacc6de7180213621a2d6b2328ca7d"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__iterator.html#a2baacc6de7180213621a2d6b2328ca7d">const_iterator</a> ()</td></tr>
|
||||
<tr class="separator:a2baacc6de7180213621a2d6b2328ca7d"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:a273a0ea9cf66fac0787e90339fd49371"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__iterator.html#a273a0ea9cf66fac0787e90339fd49371">const_iterator</a> (<a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a878e152905d602bcdb98e0e6acd8bd82">Storage</a> const *ptr, int idx=0)</td></tr>
|
||||
<tr class="separator:a273a0ea9cf66fac0787e90339fd49371"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:adcdcdf49b5d8e3ed801e2555c4f02b99"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> iterator & </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__iterator.html#adcdcdf49b5d8e3ed801e2555c4f02b99">operator++</a> ()</td></tr>
|
||||
<tr class="separator:adcdcdf49b5d8e3ed801e2555c4f02b99"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:aa2c9f9bb9601208bd784bdc821b62f3a"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> iterator & </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__iterator.html#aa2c9f9bb9601208bd784bdc821b62f3a">operator--</a> ()</td></tr>
|
||||
<tr class="separator:aa2c9f9bb9601208bd784bdc821b62f3a"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:a4094d6ae6bb6ade0f850ce96870bbc37"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> iterator </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__iterator.html#a4094d6ae6bb6ade0f850ce96870bbc37">operator++</a> (int)</td></tr>
|
||||
<tr class="separator:a4094d6ae6bb6ade0f850ce96870bbc37"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:a3eebbf306ba37383e98360c0aa882e34"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> iterator </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__iterator.html#a3eebbf306ba37383e98360c0aa882e34">operator--</a> (int)</td></tr>
|
||||
<tr class="separator:a3eebbf306ba37383e98360c0aa882e34"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:a36aa6aa70a9536a7d2750d83d53f39f3"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> const_reference </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__iterator.html#a36aa6aa70a9536a7d2750d83d53f39f3">operator*</a> () const </td></tr>
|
||||
<tr class="separator:a36aa6aa70a9536a7d2750d83d53f39f3"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:a42dd93a0f0df4ec86de4880fa9cc5843"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> bool </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__iterator.html#a42dd93a0f0df4ec86de4880fa9cc5843">operator==</a> (iterator const &other) const </td></tr>
|
||||
<tr class="separator:a42dd93a0f0df4ec86de4880fa9cc5843"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:ad8a6c87e370a53e7ff783ee4ad3d1198"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> bool </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__iterator.html#ad8a6c87e370a53e7ff783ee4ad3d1198">operator!=</a> (iterator const &other) const </td></tr>
|
||||
<tr class="separator:ad8a6c87e370a53e7ff783ee4ad3d1198"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
</table>
|
||||
<h2 class="groupheader">Constructor & Destructor Documentation</h2>
|
||||
<a class="anchor" id="a2baacc6de7180213621a2d6b2328ca7d"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> cutlass::Array< T, N, false >::const_iterator::const_iterator </td>
|
||||
<td>(</td>
|
||||
<td class="paramname"></td><td>)</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<a class="anchor" id="a273a0ea9cf66fac0787e90339fd49371"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> cutlass::Array< T, N, false >::const_iterator::const_iterator </td>
|
||||
<td>(</td>
|
||||
<td class="paramtype"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a878e152905d602bcdb98e0e6acd8bd82">Storage</a> const * </td>
|
||||
<td class="paramname"><em>ptr</em>, </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="paramkey"></td>
|
||||
<td></td>
|
||||
<td class="paramtype">int </td>
|
||||
<td class="paramname"><em>idx</em> = <code>0</code> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td></td>
|
||||
<td>)</td>
|
||||
<td></td><td></td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<h2 class="groupheader">Member Function Documentation</h2>
|
||||
<a class="anchor" id="ad8a6c87e370a53e7ff783ee4ad3d1198"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> bool cutlass::Array< T, N, false >::const_iterator::operator!= </td>
|
||||
<td>(</td>
|
||||
<td class="paramtype">iterator const & </td>
|
||||
<td class="paramname"><em>other</em></td><td>)</td>
|
||||
<td> const</td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<a class="anchor" id="a36aa6aa70a9536a7d2750d83d53f39f3"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> const_reference cutlass::Array< T, N, false >::const_iterator::operator* </td>
|
||||
<td>(</td>
|
||||
<td class="paramname"></td><td>)</td>
|
||||
<td> const</td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<a class="anchor" id="adcdcdf49b5d8e3ed801e2555c4f02b99"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> iterator& cutlass::Array< T, N, false >::const_iterator::operator++ </td>
|
||||
<td>(</td>
|
||||
<td class="paramname"></td><td>)</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<a class="anchor" id="a4094d6ae6bb6ade0f850ce96870bbc37"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> iterator cutlass::Array< T, N, false >::const_iterator::operator++ </td>
|
||||
<td>(</td>
|
||||
<td class="paramtype">int </td>
|
||||
<td class="paramname"></td><td>)</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<a class="anchor" id="aa2c9f9bb9601208bd784bdc821b62f3a"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> iterator& cutlass::Array< T, N, false >::const_iterator::operator-- </td>
|
||||
<td>(</td>
|
||||
<td class="paramname"></td><td>)</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<a class="anchor" id="a3eebbf306ba37383e98360c0aa882e34"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> iterator cutlass::Array< T, N, false >::const_iterator::operator-- </td>
|
||||
<td>(</td>
|
||||
<td class="paramtype">int </td>
|
||||
<td class="paramname"></td><td>)</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<a class="anchor" id="a42dd93a0f0df4ec86de4880fa9cc5843"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> bool cutlass::Array< T, N, false >::const_iterator::operator== </td>
|
||||
<td>(</td>
|
||||
<td class="paramtype">iterator const & </td>
|
||||
<td class="paramname"><em>other</em></td><td>)</td>
|
||||
<td> const</td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<hr/>The documentation for this class was generated from the following file:<ul>
|
||||
<li><a class="el" href="array__subbyte_8h_source.html">array_subbyte.h</a></li>
|
||||
</ul>
|
||||
</div><!-- contents -->
|
||||
<!-- start footer part -->
|
||||
<hr class="footer"/><address class="footer"><small>
|
||||
Generated by  <a href="http://www.doxygen.org/index.html">
|
||||
<img class="footer" src="doxygen.png" alt="doxygen"/>
|
||||
</a> 1.8.11
|
||||
</small></address>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,120 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
|
||||
<meta name="generator" content="Doxygen 1.8.11"/>
|
||||
<title>CUTLASS: Member List</title>
|
||||
<link href="tabs.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="jquery.js"></script>
|
||||
<script type="text/javascript" src="dynsections.js"></script>
|
||||
<link href="search/search.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="search/searchdata.js"></script>
|
||||
<script type="text/javascript" src="search/search.js"></script>
|
||||
<script type="text/javascript">
|
||||
$(document).ready(function() { init_search(); });
|
||||
</script>
|
||||
<script type="text/x-mathjax-config">
|
||||
MathJax.Hub.Config({
|
||||
extensions: ["tex2jax.js"],
|
||||
jax: ["input/TeX","output/HTML-CSS"],
|
||||
});
|
||||
</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
|
||||
<link href="doxygen.css" rel="stylesheet" type="text/css" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
|
||||
<div id="titlearea">
|
||||
<table cellspacing="0" cellpadding="0">
|
||||
<tbody>
|
||||
<tr style="height: 56px;">
|
||||
<td id="projectlogo"><img alt="Logo" src="cutlass-logo-small.png"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">CUTLASS
|
||||
</div>
|
||||
<div id="projectbrief">CUDA Templates for Linear Algebra Subroutines and Solvers</div>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<!-- end header part -->
|
||||
<!-- Generated by Doxygen 1.8.11 -->
|
||||
<script type="text/javascript">
|
||||
var searchBox = new SearchBox("searchBox", "search",false,'Search');
|
||||
</script>
|
||||
<div id="navrow1" class="tabs">
|
||||
<ul class="tablist">
|
||||
<li><a href="index.html"><span>Main Page</span></a></li>
|
||||
<li><a href="modules.html"><span>Modules</span></a></li>
|
||||
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
|
||||
<li class="current"><a href="annotated.html"><span>Classes</span></a></li>
|
||||
<li><a href="files.html"><span>Files</span></a></li>
|
||||
<li>
|
||||
<div id="MSearchBox" class="MSearchBoxInactive">
|
||||
<span class="left">
|
||||
<img id="MSearchSelect" src="search/mag_sel.png"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
alt=""/>
|
||||
<input type="text" id="MSearchField" value="Search" accesskey="S"
|
||||
onfocus="searchBox.OnSearchFieldFocus(true)"
|
||||
onblur="searchBox.OnSearchFieldFocus(false)"
|
||||
onkeyup="searchBox.OnSearchFieldChange(event)"/>
|
||||
</span><span class="right">
|
||||
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
|
||||
</span>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div id="navrow2" class="tabs2">
|
||||
<ul class="tablist">
|
||||
<li><a href="annotated.html"><span>Class List</span></a></li>
|
||||
<li><a href="classes.html"><span>Class Index</span></a></li>
|
||||
<li><a href="inherits.html"><span>Class Hierarchy</span></a></li>
|
||||
<li><a href="functions.html"><span>Class Members</span></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<!-- window showing the filter options -->
|
||||
<div id="MSearchSelectWindow"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
onkeydown="return searchBox.OnSearchSelectKey(event)">
|
||||
</div>
|
||||
|
||||
<!-- iframe showing the search results (closed by default) -->
|
||||
<div id="MSearchResultsWindow">
|
||||
<iframe src="javascript:void(0)" frameborder="0"
|
||||
name="MSearchResults" id="MSearchResults">
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
<div id="nav-path" class="navpath">
|
||||
<ul>
|
||||
<li class="navelem"><a class="el" href="namespacecutlass.html">cutlass</a></li><li class="navelem"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">Array< T, N, false ></a></li><li class="navelem"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reference.html">const_reference</a></li> </ul>
|
||||
</div>
|
||||
</div><!-- top -->
|
||||
<div class="header">
|
||||
<div class="headertitle">
|
||||
<div class="title">cutlass::Array< T, N, false >::const_reference Member List</div> </div>
|
||||
</div><!--header-->
|
||||
<div class="contents">
|
||||
|
||||
<p>This is the complete list of members for <a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reference.html">cutlass::Array< T, N, false >::const_reference</a>, including all inherited members.</p>
|
||||
<table class="directory">
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reference.html#abf1841f0ac863891efcf23bd5ac57847">const_reference</a>()</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reference.html">cutlass::Array< T, N, false >::const_reference</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reference.html#ac9e3b9e2f5797efbc47e3415aa204079">const_reference</a>(Storage const *ptr, int idx=0)</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reference.html">cutlass::Array< T, N, false >::const_reference</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reference.html#a37a90c6f1edcc3d7a916211aa7520cc1">get</a>() const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reference.html">cutlass::Array< T, N, false >::const_reference</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reference.html#afa022bf34a7086c43b5bd45b40c2b25f">operator float</a>() const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reference.html">cutlass::Array< T, N, false >::const_reference</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">explicit</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reference.html#a61648afeb4e15881fb001611c37df1ec">operator int</a>() const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reference.html">cutlass::Array< T, N, false >::const_reference</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">explicit</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reference.html#a7c5f7d59a22d89a7dd5c923d9bcebd97">operator T</a>() const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reference.html">cutlass::Array< T, N, false >::const_reference</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
</table></div><!-- contents -->
|
||||
<!-- start footer part -->
|
||||
<hr class="footer"/><address class="footer"><small>
|
||||
Generated by  <a href="http://www.doxygen.org/index.html">
|
||||
<img class="footer" src="doxygen.png" alt="doxygen"/>
|
||||
</a> 1.8.11
|
||||
</small></address>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,306 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
|
||||
<meta name="generator" content="Doxygen 1.8.11"/>
|
||||
<title>CUTLASS: cutlass::Array< T, N, false >::const_reference Class Reference</title>
|
||||
<link href="tabs.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="jquery.js"></script>
|
||||
<script type="text/javascript" src="dynsections.js"></script>
|
||||
<link href="search/search.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="search/searchdata.js"></script>
|
||||
<script type="text/javascript" src="search/search.js"></script>
|
||||
<script type="text/javascript">
|
||||
$(document).ready(function() { init_search(); });
|
||||
</script>
|
||||
<script type="text/x-mathjax-config">
|
||||
MathJax.Hub.Config({
|
||||
extensions: ["tex2jax.js"],
|
||||
jax: ["input/TeX","output/HTML-CSS"],
|
||||
});
|
||||
</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
|
||||
<link href="doxygen.css" rel="stylesheet" type="text/css" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
|
||||
<div id="titlearea">
|
||||
<table cellspacing="0" cellpadding="0">
|
||||
<tbody>
|
||||
<tr style="height: 56px;">
|
||||
<td id="projectlogo"><img alt="Logo" src="cutlass-logo-small.png"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">CUTLASS
|
||||
</div>
|
||||
<div id="projectbrief">CUDA Templates for Linear Algebra Subroutines and Solvers</div>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<!-- end header part -->
|
||||
<!-- Generated by Doxygen 1.8.11 -->
|
||||
<script type="text/javascript">
|
||||
var searchBox = new SearchBox("searchBox", "search",false,'Search');
|
||||
</script>
|
||||
<div id="navrow1" class="tabs">
|
||||
<ul class="tablist">
|
||||
<li><a href="index.html"><span>Main Page</span></a></li>
|
||||
<li><a href="modules.html"><span>Modules</span></a></li>
|
||||
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
|
||||
<li class="current"><a href="annotated.html"><span>Classes</span></a></li>
|
||||
<li><a href="files.html"><span>Files</span></a></li>
|
||||
<li>
|
||||
<div id="MSearchBox" class="MSearchBoxInactive">
|
||||
<span class="left">
|
||||
<img id="MSearchSelect" src="search/mag_sel.png"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
alt=""/>
|
||||
<input type="text" id="MSearchField" value="Search" accesskey="S"
|
||||
onfocus="searchBox.OnSearchFieldFocus(true)"
|
||||
onblur="searchBox.OnSearchFieldFocus(false)"
|
||||
onkeyup="searchBox.OnSearchFieldChange(event)"/>
|
||||
</span><span class="right">
|
||||
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
|
||||
</span>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div id="navrow2" class="tabs2">
|
||||
<ul class="tablist">
|
||||
<li><a href="annotated.html"><span>Class List</span></a></li>
|
||||
<li><a href="classes.html"><span>Class Index</span></a></li>
|
||||
<li><a href="inherits.html"><span>Class Hierarchy</span></a></li>
|
||||
<li><a href="functions.html"><span>Class Members</span></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<!-- window showing the filter options -->
|
||||
<div id="MSearchSelectWindow"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
onkeydown="return searchBox.OnSearchSelectKey(event)">
|
||||
</div>
|
||||
|
||||
<!-- iframe showing the search results (closed by default) -->
|
||||
<div id="MSearchResultsWindow">
|
||||
<iframe src="javascript:void(0)" frameborder="0"
|
||||
name="MSearchResults" id="MSearchResults">
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
<div id="nav-path" class="navpath">
|
||||
<ul>
|
||||
<li class="navelem"><a class="el" href="namespacecutlass.html">cutlass</a></li><li class="navelem"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">Array< T, N, false ></a></li><li class="navelem"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reference.html">const_reference</a></li> </ul>
|
||||
</div>
|
||||
</div><!-- top -->
|
||||
<div class="header">
|
||||
<div class="summary">
|
||||
<a href="#pub-methods">Public Member Functions</a> |
|
||||
<a href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reference-members.html">List of all members</a> </div>
|
||||
<div class="headertitle">
|
||||
<div class="title">cutlass::Array< T, N, false >::const_reference Class Reference</div> </div>
|
||||
</div><!--header-->
|
||||
<div class="contents">
|
||||
|
||||
<p>Reference object extracts sub-byte items.
|
||||
</p>
|
||||
|
||||
<p><code>#include <<a class="el" href="array__subbyte_8h_source.html">array_subbyte.h</a>></code></p>
|
||||
<table class="memberdecls">
|
||||
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-methods"></a>
|
||||
Public Member Functions</h2></td></tr>
|
||||
<tr class="memitem:abf1841f0ac863891efcf23bd5ac57847"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reference.html#abf1841f0ac863891efcf23bd5ac57847">const_reference</a> ()</td></tr>
|
||||
<tr class="memdesc:abf1841f0ac863891efcf23bd5ac57847"><td class="mdescLeft"> </td><td class="mdescRight">Default ctor. <a href="#abf1841f0ac863891efcf23bd5ac57847">More...</a><br /></td></tr>
|
||||
<tr class="separator:abf1841f0ac863891efcf23bd5ac57847"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:ac9e3b9e2f5797efbc47e3415aa204079"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reference.html#ac9e3b9e2f5797efbc47e3415aa204079">const_reference</a> (<a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a878e152905d602bcdb98e0e6acd8bd82">Storage</a> const *ptr, int idx=0)</td></tr>
|
||||
<tr class="memdesc:ac9e3b9e2f5797efbc47e3415aa204079"><td class="mdescLeft"> </td><td class="mdescRight">Ctor. <a href="#ac9e3b9e2f5797efbc47e3415aa204079">More...</a><br /></td></tr>
|
||||
<tr class="separator:ac9e3b9e2f5797efbc47e3415aa204079"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:a37a90c6f1edcc3d7a916211aa7520cc1"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> const T </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reference.html#a37a90c6f1edcc3d7a916211aa7520cc1">get</a> () const </td></tr>
|
||||
<tr class="separator:a37a90c6f1edcc3d7a916211aa7520cc1"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:a7c5f7d59a22d89a7dd5c923d9bcebd97"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reference.html#a7c5f7d59a22d89a7dd5c923d9bcebd97">operator T</a> () const </td></tr>
|
||||
<tr class="memdesc:a7c5f7d59a22d89a7dd5c923d9bcebd97"><td class="mdescLeft"> </td><td class="mdescRight">Extract. <a href="#a7c5f7d59a22d89a7dd5c923d9bcebd97">More...</a><br /></td></tr>
|
||||
<tr class="separator:a7c5f7d59a22d89a7dd5c923d9bcebd97"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:a61648afeb4e15881fb001611c37df1ec"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reference.html#a61648afeb4e15881fb001611c37df1ec">operator int</a> () const </td></tr>
|
||||
<tr class="memdesc:a61648afeb4e15881fb001611c37df1ec"><td class="mdescLeft"> </td><td class="mdescRight">Explicit cast to int. <a href="#a61648afeb4e15881fb001611c37df1ec">More...</a><br /></td></tr>
|
||||
<tr class="separator:a61648afeb4e15881fb001611c37df1ec"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:afa022bf34a7086c43b5bd45b40c2b25f"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reference.html#afa022bf34a7086c43b5bd45b40c2b25f">operator float</a> () const </td></tr>
|
||||
<tr class="memdesc:afa022bf34a7086c43b5bd45b40c2b25f"><td class="mdescLeft"> </td><td class="mdescRight">Explicit cast to float. <a href="#afa022bf34a7086c43b5bd45b40c2b25f">More...</a><br /></td></tr>
|
||||
<tr class="separator:afa022bf34a7086c43b5bd45b40c2b25f"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
</table>
|
||||
<h2 class="groupheader">Constructor & Destructor Documentation</h2>
|
||||
<a class="anchor" id="abf1841f0ac863891efcf23bd5ac57847"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> cutlass::Array< T, N, false >::const_reference::const_reference </td>
|
||||
<td>(</td>
|
||||
<td class="paramname"></td><td>)</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<a class="anchor" id="ac9e3b9e2f5797efbc47e3415aa204079"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> cutlass::Array< T, N, false >::const_reference::const_reference </td>
|
||||
<td>(</td>
|
||||
<td class="paramtype"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a878e152905d602bcdb98e0e6acd8bd82">Storage</a> const * </td>
|
||||
<td class="paramname"><em>ptr</em>, </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="paramkey"></td>
|
||||
<td></td>
|
||||
<td class="paramtype">int </td>
|
||||
<td class="paramname"><em>idx</em> = <code>0</code> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td></td>
|
||||
<td>)</td>
|
||||
<td></td><td></td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<h2 class="groupheader">Member Function Documentation</h2>
|
||||
<a class="anchor" id="a37a90c6f1edcc3d7a916211aa7520cc1"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> const T cutlass::Array< T, N, false >::const_reference::get </td>
|
||||
<td>(</td>
|
||||
<td class="paramname"></td><td>)</td>
|
||||
<td> const</td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<a class="anchor" id="afa022bf34a7086c43b5bd45b40c2b25f"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> cutlass::Array< T, N, false >::const_reference::operator float </td>
|
||||
<td>(</td>
|
||||
<td class="paramname"></td><td>)</td>
|
||||
<td> const</td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span><span class="mlabel">explicit</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<a class="anchor" id="a61648afeb4e15881fb001611c37df1ec"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> cutlass::Array< T, N, false >::const_reference::operator int </td>
|
||||
<td>(</td>
|
||||
<td class="paramname"></td><td>)</td>
|
||||
<td> const</td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span><span class="mlabel">explicit</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<a class="anchor" id="a7c5f7d59a22d89a7dd5c923d9bcebd97"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> cutlass::Array< T, N, false >::const_reference::operator T </td>
|
||||
<td>(</td>
|
||||
<td class="paramname"></td><td>)</td>
|
||||
<td> const</td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<hr/>The documentation for this class was generated from the following file:<ul>
|
||||
<li><a class="el" href="array__subbyte_8h_source.html">array_subbyte.h</a></li>
|
||||
</ul>
|
||||
</div><!-- contents -->
|
||||
<!-- start footer part -->
|
||||
<hr class="footer"/><address class="footer"><small>
|
||||
Generated by  <a href="http://www.doxygen.org/index.html">
|
||||
<img class="footer" src="doxygen.png" alt="doxygen"/>
|
||||
</a> 1.8.11
|
||||
</small></address>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,116 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
|
||||
<meta name="generator" content="Doxygen 1.8.11"/>
|
||||
<title>CUTLASS: Member List</title>
|
||||
<link href="tabs.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="jquery.js"></script>
|
||||
<script type="text/javascript" src="dynsections.js"></script>
|
||||
<link href="search/search.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="search/searchdata.js"></script>
|
||||
<script type="text/javascript" src="search/search.js"></script>
|
||||
<script type="text/javascript">
|
||||
$(document).ready(function() { init_search(); });
|
||||
</script>
|
||||
<script type="text/x-mathjax-config">
|
||||
MathJax.Hub.Config({
|
||||
extensions: ["tex2jax.js"],
|
||||
jax: ["input/TeX","output/HTML-CSS"],
|
||||
});
|
||||
</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
|
||||
<link href="doxygen.css" rel="stylesheet" type="text/css" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
|
||||
<div id="titlearea">
|
||||
<table cellspacing="0" cellpadding="0">
|
||||
<tbody>
|
||||
<tr style="height: 56px;">
|
||||
<td id="projectlogo"><img alt="Logo" src="cutlass-logo-small.png"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">CUTLASS
|
||||
</div>
|
||||
<div id="projectbrief">CUDA Templates for Linear Algebra Subroutines and Solvers</div>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<!-- end header part -->
|
||||
<!-- Generated by Doxygen 1.8.11 -->
|
||||
<script type="text/javascript">
|
||||
var searchBox = new SearchBox("searchBox", "search",false,'Search');
|
||||
</script>
|
||||
<div id="navrow1" class="tabs">
|
||||
<ul class="tablist">
|
||||
<li><a href="index.html"><span>Main Page</span></a></li>
|
||||
<li><a href="modules.html"><span>Modules</span></a></li>
|
||||
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
|
||||
<li class="current"><a href="annotated.html"><span>Classes</span></a></li>
|
||||
<li><a href="files.html"><span>Files</span></a></li>
|
||||
<li>
|
||||
<div id="MSearchBox" class="MSearchBoxInactive">
|
||||
<span class="left">
|
||||
<img id="MSearchSelect" src="search/mag_sel.png"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
alt=""/>
|
||||
<input type="text" id="MSearchField" value="Search" accesskey="S"
|
||||
onfocus="searchBox.OnSearchFieldFocus(true)"
|
||||
onblur="searchBox.OnSearchFieldFocus(false)"
|
||||
onkeyup="searchBox.OnSearchFieldChange(event)"/>
|
||||
</span><span class="right">
|
||||
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
|
||||
</span>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div id="navrow2" class="tabs2">
|
||||
<ul class="tablist">
|
||||
<li><a href="annotated.html"><span>Class List</span></a></li>
|
||||
<li><a href="classes.html"><span>Class Index</span></a></li>
|
||||
<li><a href="inherits.html"><span>Class Hierarchy</span></a></li>
|
||||
<li><a href="functions.html"><span>Class Members</span></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<!-- window showing the filter options -->
|
||||
<div id="MSearchSelectWindow"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
onkeydown="return searchBox.OnSearchSelectKey(event)">
|
||||
</div>
|
||||
|
||||
<!-- iframe showing the search results (closed by default) -->
|
||||
<div id="MSearchResultsWindow">
|
||||
<iframe src="javascript:void(0)" frameborder="0"
|
||||
name="MSearchResults" id="MSearchResults">
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
<div id="nav-path" class="navpath">
|
||||
<ul>
|
||||
<li class="navelem"><a class="el" href="namespacecutlass.html">cutlass</a></li><li class="navelem"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">Array< T, N, false ></a></li><li class="navelem"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reverse__iterator.html">const_reverse_iterator</a></li> </ul>
|
||||
</div>
|
||||
</div><!-- top -->
|
||||
<div class="header">
|
||||
<div class="headertitle">
|
||||
<div class="title">cutlass::Array< T, N, false >::const_reverse_iterator Member List</div> </div>
|
||||
</div><!--header-->
|
||||
<div class="contents">
|
||||
|
||||
<p>This is the complete list of members for <a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reverse__iterator.html">cutlass::Array< T, N, false >::const_reverse_iterator</a>, including all inherited members.</p>
|
||||
<table class="directory">
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reverse__iterator.html#aae7705a26ea52ebd18d5f5809d816ee2">const_reverse_iterator</a>()</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reverse__iterator.html">cutlass::Array< T, N, false >::const_reverse_iterator</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reverse__iterator.html#a4bef88847b70f6bca81dd46bd883373b">const_reverse_iterator</a>(Storage const *ptr, int idx=0)</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reverse__iterator.html">cutlass::Array< T, N, false >::const_reverse_iterator</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
</table></div><!-- contents -->
|
||||
<!-- start footer part -->
|
||||
<hr class="footer"/><address class="footer"><small>
|
||||
Generated by  <a href="http://www.doxygen.org/index.html">
|
||||
<img class="footer" src="doxygen.png" alt="doxygen"/>
|
||||
</a> 1.8.11
|
||||
</small></address>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,192 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
|
||||
<meta name="generator" content="Doxygen 1.8.11"/>
|
||||
<title>CUTLASS: cutlass::Array< T, N, false >::const_reverse_iterator Class Reference</title>
|
||||
<link href="tabs.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="jquery.js"></script>
|
||||
<script type="text/javascript" src="dynsections.js"></script>
|
||||
<link href="search/search.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="search/searchdata.js"></script>
|
||||
<script type="text/javascript" src="search/search.js"></script>
|
||||
<script type="text/javascript">
|
||||
$(document).ready(function() { init_search(); });
|
||||
</script>
|
||||
<script type="text/x-mathjax-config">
|
||||
MathJax.Hub.Config({
|
||||
extensions: ["tex2jax.js"],
|
||||
jax: ["input/TeX","output/HTML-CSS"],
|
||||
});
|
||||
</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
|
||||
<link href="doxygen.css" rel="stylesheet" type="text/css" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
|
||||
<div id="titlearea">
|
||||
<table cellspacing="0" cellpadding="0">
|
||||
<tbody>
|
||||
<tr style="height: 56px;">
|
||||
<td id="projectlogo"><img alt="Logo" src="cutlass-logo-small.png"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">CUTLASS
|
||||
</div>
|
||||
<div id="projectbrief">CUDA Templates for Linear Algebra Subroutines and Solvers</div>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<!-- end header part -->
|
||||
<!-- Generated by Doxygen 1.8.11 -->
|
||||
<script type="text/javascript">
|
||||
var searchBox = new SearchBox("searchBox", "search",false,'Search');
|
||||
</script>
|
||||
<div id="navrow1" class="tabs">
|
||||
<ul class="tablist">
|
||||
<li><a href="index.html"><span>Main Page</span></a></li>
|
||||
<li><a href="modules.html"><span>Modules</span></a></li>
|
||||
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
|
||||
<li class="current"><a href="annotated.html"><span>Classes</span></a></li>
|
||||
<li><a href="files.html"><span>Files</span></a></li>
|
||||
<li>
|
||||
<div id="MSearchBox" class="MSearchBoxInactive">
|
||||
<span class="left">
|
||||
<img id="MSearchSelect" src="search/mag_sel.png"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
alt=""/>
|
||||
<input type="text" id="MSearchField" value="Search" accesskey="S"
|
||||
onfocus="searchBox.OnSearchFieldFocus(true)"
|
||||
onblur="searchBox.OnSearchFieldFocus(false)"
|
||||
onkeyup="searchBox.OnSearchFieldChange(event)"/>
|
||||
</span><span class="right">
|
||||
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
|
||||
</span>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div id="navrow2" class="tabs2">
|
||||
<ul class="tablist">
|
||||
<li><a href="annotated.html"><span>Class List</span></a></li>
|
||||
<li><a href="classes.html"><span>Class Index</span></a></li>
|
||||
<li><a href="inherits.html"><span>Class Hierarchy</span></a></li>
|
||||
<li><a href="functions.html"><span>Class Members</span></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<!-- window showing the filter options -->
|
||||
<div id="MSearchSelectWindow"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
onkeydown="return searchBox.OnSearchSelectKey(event)">
|
||||
</div>
|
||||
|
||||
<!-- iframe showing the search results (closed by default) -->
|
||||
<div id="MSearchResultsWindow">
|
||||
<iframe src="javascript:void(0)" frameborder="0"
|
||||
name="MSearchResults" id="MSearchResults">
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
<div id="nav-path" class="navpath">
|
||||
<ul>
|
||||
<li class="navelem"><a class="el" href="namespacecutlass.html">cutlass</a></li><li class="navelem"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">Array< T, N, false ></a></li><li class="navelem"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reverse__iterator.html">const_reverse_iterator</a></li> </ul>
|
||||
</div>
|
||||
</div><!-- top -->
|
||||
<div class="header">
|
||||
<div class="summary">
|
||||
<a href="#pub-methods">Public Member Functions</a> |
|
||||
<a href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reverse__iterator-members.html">List of all members</a> </div>
|
||||
<div class="headertitle">
|
||||
<div class="title">cutlass::Array< T, N, false >::const_reverse_iterator Class Reference</div> </div>
|
||||
</div><!--header-->
|
||||
<div class="contents">
|
||||
|
||||
<p>Bidirectional constant iterator over elements.
|
||||
</p>
|
||||
|
||||
<p><code>#include <<a class="el" href="array__subbyte_8h_source.html">array_subbyte.h</a>></code></p>
|
||||
<table class="memberdecls">
|
||||
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-methods"></a>
|
||||
Public Member Functions</h2></td></tr>
|
||||
<tr class="memitem:aae7705a26ea52ebd18d5f5809d816ee2"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reverse__iterator.html#aae7705a26ea52ebd18d5f5809d816ee2">const_reverse_iterator</a> ()</td></tr>
|
||||
<tr class="separator:aae7705a26ea52ebd18d5f5809d816ee2"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:a4bef88847b70f6bca81dd46bd883373b"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1const__reverse__iterator.html#a4bef88847b70f6bca81dd46bd883373b">const_reverse_iterator</a> (<a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a878e152905d602bcdb98e0e6acd8bd82">Storage</a> const *ptr, int idx=0)</td></tr>
|
||||
<tr class="separator:a4bef88847b70f6bca81dd46bd883373b"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
</table>
|
||||
<h2 class="groupheader">Constructor & Destructor Documentation</h2>
|
||||
<a class="anchor" id="aae7705a26ea52ebd18d5f5809d816ee2"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> cutlass::Array< T, N, false >::const_reverse_iterator::const_reverse_iterator </td>
|
||||
<td>(</td>
|
||||
<td class="paramname"></td><td>)</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<a class="anchor" id="a4bef88847b70f6bca81dd46bd883373b"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> cutlass::Array< T, N, false >::const_reverse_iterator::const_reverse_iterator </td>
|
||||
<td>(</td>
|
||||
<td class="paramtype"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a878e152905d602bcdb98e0e6acd8bd82">Storage</a> const * </td>
|
||||
<td class="paramname"><em>ptr</em>, </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="paramkey"></td>
|
||||
<td></td>
|
||||
<td class="paramtype">int </td>
|
||||
<td class="paramname"><em>idx</em> = <code>0</code> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td></td>
|
||||
<td>)</td>
|
||||
<td></td><td></td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<hr/>The documentation for this class was generated from the following file:<ul>
|
||||
<li><a class="el" href="array__subbyte_8h_source.html">array_subbyte.h</a></li>
|
||||
</ul>
|
||||
</div><!-- contents -->
|
||||
<!-- start footer part -->
|
||||
<hr class="footer"/><address class="footer"><small>
|
||||
Generated by  <a href="http://www.doxygen.org/index.html">
|
||||
<img class="footer" src="doxygen.png" alt="doxygen"/>
|
||||
</a> 1.8.11
|
||||
</small></address>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,123 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
|
||||
<meta name="generator" content="Doxygen 1.8.11"/>
|
||||
<title>CUTLASS: Member List</title>
|
||||
<link href="tabs.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="jquery.js"></script>
|
||||
<script type="text/javascript" src="dynsections.js"></script>
|
||||
<link href="search/search.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="search/searchdata.js"></script>
|
||||
<script type="text/javascript" src="search/search.js"></script>
|
||||
<script type="text/javascript">
|
||||
$(document).ready(function() { init_search(); });
|
||||
</script>
|
||||
<script type="text/x-mathjax-config">
|
||||
MathJax.Hub.Config({
|
||||
extensions: ["tex2jax.js"],
|
||||
jax: ["input/TeX","output/HTML-CSS"],
|
||||
});
|
||||
</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
|
||||
<link href="doxygen.css" rel="stylesheet" type="text/css" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
|
||||
<div id="titlearea">
|
||||
<table cellspacing="0" cellpadding="0">
|
||||
<tbody>
|
||||
<tr style="height: 56px;">
|
||||
<td id="projectlogo"><img alt="Logo" src="cutlass-logo-small.png"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">CUTLASS
|
||||
</div>
|
||||
<div id="projectbrief">CUDA Templates for Linear Algebra Subroutines and Solvers</div>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<!-- end header part -->
|
||||
<!-- Generated by Doxygen 1.8.11 -->
|
||||
<script type="text/javascript">
|
||||
var searchBox = new SearchBox("searchBox", "search",false,'Search');
|
||||
</script>
|
||||
<div id="navrow1" class="tabs">
|
||||
<ul class="tablist">
|
||||
<li><a href="index.html"><span>Main Page</span></a></li>
|
||||
<li><a href="modules.html"><span>Modules</span></a></li>
|
||||
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
|
||||
<li class="current"><a href="annotated.html"><span>Classes</span></a></li>
|
||||
<li><a href="files.html"><span>Files</span></a></li>
|
||||
<li>
|
||||
<div id="MSearchBox" class="MSearchBoxInactive">
|
||||
<span class="left">
|
||||
<img id="MSearchSelect" src="search/mag_sel.png"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
alt=""/>
|
||||
<input type="text" id="MSearchField" value="Search" accesskey="S"
|
||||
onfocus="searchBox.OnSearchFieldFocus(true)"
|
||||
onblur="searchBox.OnSearchFieldFocus(false)"
|
||||
onkeyup="searchBox.OnSearchFieldChange(event)"/>
|
||||
</span><span class="right">
|
||||
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
|
||||
</span>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div id="navrow2" class="tabs2">
|
||||
<ul class="tablist">
|
||||
<li><a href="annotated.html"><span>Class List</span></a></li>
|
||||
<li><a href="classes.html"><span>Class Index</span></a></li>
|
||||
<li><a href="inherits.html"><span>Class Hierarchy</span></a></li>
|
||||
<li><a href="functions.html"><span>Class Members</span></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<!-- window showing the filter options -->
|
||||
<div id="MSearchSelectWindow"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
onkeydown="return searchBox.OnSearchSelectKey(event)">
|
||||
</div>
|
||||
|
||||
<!-- iframe showing the search results (closed by default) -->
|
||||
<div id="MSearchResultsWindow">
|
||||
<iframe src="javascript:void(0)" frameborder="0"
|
||||
name="MSearchResults" id="MSearchResults">
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
<div id="nav-path" class="navpath">
|
||||
<ul>
|
||||
<li class="navelem"><a class="el" href="namespacecutlass.html">cutlass</a></li><li class="navelem"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">Array< T, N, false ></a></li><li class="navelem"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html">iterator</a></li> </ul>
|
||||
</div>
|
||||
</div><!-- top -->
|
||||
<div class="header">
|
||||
<div class="headertitle">
|
||||
<div class="title">cutlass::Array< T, N, false >::iterator Member List</div> </div>
|
||||
</div><!--header-->
|
||||
<div class="contents">
|
||||
|
||||
<p>This is the complete list of members for <a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html">cutlass::Array< T, N, false >::iterator</a>, including all inherited members.</p>
|
||||
<table class="directory">
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html#adb69680f23a0ba9bbe107900fa537228">iterator</a>()</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html">cutlass::Array< T, N, false >::iterator</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html#af7a5f107d79655c43e2f2a42d05a6014">iterator</a>(Storage *ptr, int idx=0)</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html">cutlass::Array< T, N, false >::iterator</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html#ae72e0d7919ac6d40e1d4f8ce5458af1e">operator!=</a>(iterator const &other) const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html">cutlass::Array< T, N, false >::iterator</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html#ac43741ba9bcacd11dfb91fe02c57bef5">operator*</a>() const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html">cutlass::Array< T, N, false >::iterator</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html#a3fa26abda72f9714e39af23bcb5f97df">operator++</a>()</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html">cutlass::Array< T, N, false >::iterator</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html#a3b5e8ff9cb4e7875a6cc26403400d7c3">operator++</a>(int)</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html">cutlass::Array< T, N, false >::iterator</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html#a16e57c02d414c3a5591e289c3fd01a22">operator--</a>()</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html">cutlass::Array< T, N, false >::iterator</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html#ae0fd752d82e67eb74ace86cfdaa69020">operator--</a>(int)</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html">cutlass::Array< T, N, false >::iterator</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html#ac6173c654b3cc22cb357e5ad847dffc9">operator==</a>(iterator const &other) const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html">cutlass::Array< T, N, false >::iterator</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
</table></div><!-- contents -->
|
||||
<!-- start footer part -->
|
||||
<hr class="footer"/><address class="footer"><small>
|
||||
Generated by  <a href="http://www.doxygen.org/index.html">
|
||||
<img class="footer" src="doxygen.png" alt="doxygen"/>
|
||||
</a> 1.8.11
|
||||
</small></address>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,386 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
|
||||
<meta name="generator" content="Doxygen 1.8.11"/>
|
||||
<title>CUTLASS: cutlass::Array< T, N, false >::iterator Class Reference</title>
|
||||
<link href="tabs.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="jquery.js"></script>
|
||||
<script type="text/javascript" src="dynsections.js"></script>
|
||||
<link href="search/search.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="search/searchdata.js"></script>
|
||||
<script type="text/javascript" src="search/search.js"></script>
|
||||
<script type="text/javascript">
|
||||
$(document).ready(function() { init_search(); });
|
||||
</script>
|
||||
<script type="text/x-mathjax-config">
|
||||
MathJax.Hub.Config({
|
||||
extensions: ["tex2jax.js"],
|
||||
jax: ["input/TeX","output/HTML-CSS"],
|
||||
});
|
||||
</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
|
||||
<link href="doxygen.css" rel="stylesheet" type="text/css" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
|
||||
<div id="titlearea">
|
||||
<table cellspacing="0" cellpadding="0">
|
||||
<tbody>
|
||||
<tr style="height: 56px;">
|
||||
<td id="projectlogo"><img alt="Logo" src="cutlass-logo-small.png"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">CUTLASS
|
||||
</div>
|
||||
<div id="projectbrief">CUDA Templates for Linear Algebra Subroutines and Solvers</div>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<!-- end header part -->
|
||||
<!-- Generated by Doxygen 1.8.11 -->
|
||||
<script type="text/javascript">
|
||||
var searchBox = new SearchBox("searchBox", "search",false,'Search');
|
||||
</script>
|
||||
<div id="navrow1" class="tabs">
|
||||
<ul class="tablist">
|
||||
<li><a href="index.html"><span>Main Page</span></a></li>
|
||||
<li><a href="modules.html"><span>Modules</span></a></li>
|
||||
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
|
||||
<li class="current"><a href="annotated.html"><span>Classes</span></a></li>
|
||||
<li><a href="files.html"><span>Files</span></a></li>
|
||||
<li>
|
||||
<div id="MSearchBox" class="MSearchBoxInactive">
|
||||
<span class="left">
|
||||
<img id="MSearchSelect" src="search/mag_sel.png"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
alt=""/>
|
||||
<input type="text" id="MSearchField" value="Search" accesskey="S"
|
||||
onfocus="searchBox.OnSearchFieldFocus(true)"
|
||||
onblur="searchBox.OnSearchFieldFocus(false)"
|
||||
onkeyup="searchBox.OnSearchFieldChange(event)"/>
|
||||
</span><span class="right">
|
||||
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
|
||||
</span>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div id="navrow2" class="tabs2">
|
||||
<ul class="tablist">
|
||||
<li><a href="annotated.html"><span>Class List</span></a></li>
|
||||
<li><a href="classes.html"><span>Class Index</span></a></li>
|
||||
<li><a href="inherits.html"><span>Class Hierarchy</span></a></li>
|
||||
<li><a href="functions.html"><span>Class Members</span></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<!-- window showing the filter options -->
|
||||
<div id="MSearchSelectWindow"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
onkeydown="return searchBox.OnSearchSelectKey(event)">
|
||||
</div>
|
||||
|
||||
<!-- iframe showing the search results (closed by default) -->
|
||||
<div id="MSearchResultsWindow">
|
||||
<iframe src="javascript:void(0)" frameborder="0"
|
||||
name="MSearchResults" id="MSearchResults">
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
<div id="nav-path" class="navpath">
|
||||
<ul>
|
||||
<li class="navelem"><a class="el" href="namespacecutlass.html">cutlass</a></li><li class="navelem"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">Array< T, N, false ></a></li><li class="navelem"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html">iterator</a></li> </ul>
|
||||
</div>
|
||||
</div><!-- top -->
|
||||
<div class="header">
|
||||
<div class="summary">
|
||||
<a href="#pub-methods">Public Member Functions</a> |
|
||||
<a href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator-members.html">List of all members</a> </div>
|
||||
<div class="headertitle">
|
||||
<div class="title">cutlass::Array< T, N, false >::iterator Class Reference</div> </div>
|
||||
</div><!--header-->
|
||||
<div class="contents">
|
||||
|
||||
<p>Bidirectional iterator over elements.
|
||||
</p>
|
||||
|
||||
<p><code>#include <<a class="el" href="array__subbyte_8h_source.html">array_subbyte.h</a>></code></p>
|
||||
<table class="memberdecls">
|
||||
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-methods"></a>
|
||||
Public Member Functions</h2></td></tr>
|
||||
<tr class="memitem:adb69680f23a0ba9bbe107900fa537228"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html#adb69680f23a0ba9bbe107900fa537228">iterator</a> ()</td></tr>
|
||||
<tr class="separator:adb69680f23a0ba9bbe107900fa537228"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:af7a5f107d79655c43e2f2a42d05a6014"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html#af7a5f107d79655c43e2f2a42d05a6014">iterator</a> (<a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a878e152905d602bcdb98e0e6acd8bd82">Storage</a> *ptr, int idx=0)</td></tr>
|
||||
<tr class="separator:af7a5f107d79655c43e2f2a42d05a6014"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:a3fa26abda72f9714e39af23bcb5f97df"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> <a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html#adb69680f23a0ba9bbe107900fa537228">iterator</a> & </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html#a3fa26abda72f9714e39af23bcb5f97df">operator++</a> ()</td></tr>
|
||||
<tr class="separator:a3fa26abda72f9714e39af23bcb5f97df"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:a16e57c02d414c3a5591e289c3fd01a22"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> <a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html#adb69680f23a0ba9bbe107900fa537228">iterator</a> & </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html#a16e57c02d414c3a5591e289c3fd01a22">operator--</a> ()</td></tr>
|
||||
<tr class="separator:a16e57c02d414c3a5591e289c3fd01a22"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:a3b5e8ff9cb4e7875a6cc26403400d7c3"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> <a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html#adb69680f23a0ba9bbe107900fa537228">iterator</a> </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html#a3b5e8ff9cb4e7875a6cc26403400d7c3">operator++</a> (int)</td></tr>
|
||||
<tr class="separator:a3b5e8ff9cb4e7875a6cc26403400d7c3"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:ae0fd752d82e67eb74ace86cfdaa69020"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> <a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html#adb69680f23a0ba9bbe107900fa537228">iterator</a> </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html#ae0fd752d82e67eb74ace86cfdaa69020">operator--</a> (int)</td></tr>
|
||||
<tr class="separator:ae0fd752d82e67eb74ace86cfdaa69020"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:ac43741ba9bcacd11dfb91fe02c57bef5"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> reference </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html#ac43741ba9bcacd11dfb91fe02c57bef5">operator*</a> () const </td></tr>
|
||||
<tr class="separator:ac43741ba9bcacd11dfb91fe02c57bef5"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:ac6173c654b3cc22cb357e5ad847dffc9"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> bool </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html#ac6173c654b3cc22cb357e5ad847dffc9">operator==</a> (<a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html#adb69680f23a0ba9bbe107900fa537228">iterator</a> const &other) const </td></tr>
|
||||
<tr class="separator:ac6173c654b3cc22cb357e5ad847dffc9"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:ae72e0d7919ac6d40e1d4f8ce5458af1e"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> bool </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html#ae72e0d7919ac6d40e1d4f8ce5458af1e">operator!=</a> (<a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html#adb69680f23a0ba9bbe107900fa537228">iterator</a> const &other) const </td></tr>
|
||||
<tr class="separator:ae72e0d7919ac6d40e1d4f8ce5458af1e"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
</table>
|
||||
<h2 class="groupheader">Constructor & Destructor Documentation</h2>
|
||||
<a class="anchor" id="adb69680f23a0ba9bbe107900fa537228"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> cutlass::Array< T, N, false >::iterator::iterator </td>
|
||||
<td>(</td>
|
||||
<td class="paramname"></td><td>)</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<a class="anchor" id="af7a5f107d79655c43e2f2a42d05a6014"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> cutlass::Array< T, N, false >::iterator::iterator </td>
|
||||
<td>(</td>
|
||||
<td class="paramtype"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a878e152905d602bcdb98e0e6acd8bd82">Storage</a> * </td>
|
||||
<td class="paramname"><em>ptr</em>, </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="paramkey"></td>
|
||||
<td></td>
|
||||
<td class="paramtype">int </td>
|
||||
<td class="paramname"><em>idx</em> = <code>0</code> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td></td>
|
||||
<td>)</td>
|
||||
<td></td><td></td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<h2 class="groupheader">Member Function Documentation</h2>
|
||||
<a class="anchor" id="ae72e0d7919ac6d40e1d4f8ce5458af1e"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> bool cutlass::Array< T, N, false >::iterator::operator!= </td>
|
||||
<td>(</td>
|
||||
<td class="paramtype"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html#adb69680f23a0ba9bbe107900fa537228">iterator</a> const & </td>
|
||||
<td class="paramname"><em>other</em></td><td>)</td>
|
||||
<td> const</td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<a class="anchor" id="ac43741ba9bcacd11dfb91fe02c57bef5"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> reference cutlass::Array< T, N, false >::iterator::operator* </td>
|
||||
<td>(</td>
|
||||
<td class="paramname"></td><td>)</td>
|
||||
<td> const</td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<a class="anchor" id="a3fa26abda72f9714e39af23bcb5f97df"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> <a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html#adb69680f23a0ba9bbe107900fa537228">iterator</a>& cutlass::Array< T, N, false >::iterator::operator++ </td>
|
||||
<td>(</td>
|
||||
<td class="paramname"></td><td>)</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<a class="anchor" id="a3b5e8ff9cb4e7875a6cc26403400d7c3"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> <a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html#adb69680f23a0ba9bbe107900fa537228">iterator</a> cutlass::Array< T, N, false >::iterator::operator++ </td>
|
||||
<td>(</td>
|
||||
<td class="paramtype">int </td>
|
||||
<td class="paramname"></td><td>)</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<a class="anchor" id="a16e57c02d414c3a5591e289c3fd01a22"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> <a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html#adb69680f23a0ba9bbe107900fa537228">iterator</a>& cutlass::Array< T, N, false >::iterator::operator-- </td>
|
||||
<td>(</td>
|
||||
<td class="paramname"></td><td>)</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<a class="anchor" id="ae0fd752d82e67eb74ace86cfdaa69020"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> <a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html#adb69680f23a0ba9bbe107900fa537228">iterator</a> cutlass::Array< T, N, false >::iterator::operator-- </td>
|
||||
<td>(</td>
|
||||
<td class="paramtype">int </td>
|
||||
<td class="paramname"></td><td>)</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<a class="anchor" id="ac6173c654b3cc22cb357e5ad847dffc9"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> bool cutlass::Array< T, N, false >::iterator::operator== </td>
|
||||
<td>(</td>
|
||||
<td class="paramtype"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1iterator.html#adb69680f23a0ba9bbe107900fa537228">iterator</a> const & </td>
|
||||
<td class="paramname"><em>other</em></td><td>)</td>
|
||||
<td> const</td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<hr/>The documentation for this class was generated from the following file:<ul>
|
||||
<li><a class="el" href="array__subbyte_8h_source.html">array_subbyte.h</a></li>
|
||||
</ul>
|
||||
</div><!-- contents -->
|
||||
<!-- start footer part -->
|
||||
<hr class="footer"/><address class="footer"><small>
|
||||
Generated by  <a href="http://www.doxygen.org/index.html">
|
||||
<img class="footer" src="doxygen.png" alt="doxygen"/>
|
||||
</a> 1.8.11
|
||||
</small></address>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,121 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
|
||||
<meta name="generator" content="Doxygen 1.8.11"/>
|
||||
<title>CUTLASS: Member List</title>
|
||||
<link href="tabs.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="jquery.js"></script>
|
||||
<script type="text/javascript" src="dynsections.js"></script>
|
||||
<link href="search/search.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="search/searchdata.js"></script>
|
||||
<script type="text/javascript" src="search/search.js"></script>
|
||||
<script type="text/javascript">
|
||||
$(document).ready(function() { init_search(); });
|
||||
</script>
|
||||
<script type="text/x-mathjax-config">
|
||||
MathJax.Hub.Config({
|
||||
extensions: ["tex2jax.js"],
|
||||
jax: ["input/TeX","output/HTML-CSS"],
|
||||
});
|
||||
</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
|
||||
<link href="doxygen.css" rel="stylesheet" type="text/css" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
|
||||
<div id="titlearea">
|
||||
<table cellspacing="0" cellpadding="0">
|
||||
<tbody>
|
||||
<tr style="height: 56px;">
|
||||
<td id="projectlogo"><img alt="Logo" src="cutlass-logo-small.png"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">CUTLASS
|
||||
</div>
|
||||
<div id="projectbrief">CUDA Templates for Linear Algebra Subroutines and Solvers</div>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<!-- end header part -->
|
||||
<!-- Generated by Doxygen 1.8.11 -->
|
||||
<script type="text/javascript">
|
||||
var searchBox = new SearchBox("searchBox", "search",false,'Search');
|
||||
</script>
|
||||
<div id="navrow1" class="tabs">
|
||||
<ul class="tablist">
|
||||
<li><a href="index.html"><span>Main Page</span></a></li>
|
||||
<li><a href="modules.html"><span>Modules</span></a></li>
|
||||
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
|
||||
<li class="current"><a href="annotated.html"><span>Classes</span></a></li>
|
||||
<li><a href="files.html"><span>Files</span></a></li>
|
||||
<li>
|
||||
<div id="MSearchBox" class="MSearchBoxInactive">
|
||||
<span class="left">
|
||||
<img id="MSearchSelect" src="search/mag_sel.png"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
alt=""/>
|
||||
<input type="text" id="MSearchField" value="Search" accesskey="S"
|
||||
onfocus="searchBox.OnSearchFieldFocus(true)"
|
||||
onblur="searchBox.OnSearchFieldFocus(false)"
|
||||
onkeyup="searchBox.OnSearchFieldChange(event)"/>
|
||||
</span><span class="right">
|
||||
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
|
||||
</span>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div id="navrow2" class="tabs2">
|
||||
<ul class="tablist">
|
||||
<li><a href="annotated.html"><span>Class List</span></a></li>
|
||||
<li><a href="classes.html"><span>Class Index</span></a></li>
|
||||
<li><a href="inherits.html"><span>Class Hierarchy</span></a></li>
|
||||
<li><a href="functions.html"><span>Class Members</span></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<!-- window showing the filter options -->
|
||||
<div id="MSearchSelectWindow"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
onkeydown="return searchBox.OnSearchSelectKey(event)">
|
||||
</div>
|
||||
|
||||
<!-- iframe showing the search results (closed by default) -->
|
||||
<div id="MSearchResultsWindow">
|
||||
<iframe src="javascript:void(0)" frameborder="0"
|
||||
name="MSearchResults" id="MSearchResults">
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
<div id="nav-path" class="navpath">
|
||||
<ul>
|
||||
<li class="navelem"><a class="el" href="namespacecutlass.html">cutlass</a></li><li class="navelem"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">Array< T, N, false ></a></li><li class="navelem"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reference.html">reference</a></li> </ul>
|
||||
</div>
|
||||
</div><!-- top -->
|
||||
<div class="header">
|
||||
<div class="headertitle">
|
||||
<div class="title">cutlass::Array< T, N, false >::reference Member List</div> </div>
|
||||
</div><!--header-->
|
||||
<div class="contents">
|
||||
|
||||
<p>This is the complete list of members for <a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reference.html">cutlass::Array< T, N, false >::reference</a>, including all inherited members.</p>
|
||||
<table class="directory">
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reference.html#a3bb74e5ee555773803b39cc478af5069">get</a>() const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reference.html">cutlass::Array< T, N, false >::reference</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reference.html#ac800b24861e676e21c7d6201338175bc">operator float</a>() const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reference.html">cutlass::Array< T, N, false >::reference</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">explicit</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reference.html#ad50d57afc8e33ce406f31dc15564cb3a">operator int</a>() const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reference.html">cutlass::Array< T, N, false >::reference</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">explicit</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reference.html#ac51b14ff76b80e7f7bf4142a1af01d82">operator T</a>() const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reference.html">cutlass::Array< T, N, false >::reference</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reference.html#a629bfbf64481de5252896b45721254ad">operator=</a>(T x)</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reference.html">cutlass::Array< T, N, false >::reference</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reference.html#a257c25bee7fa54ff1d492bc0697b05cc">reference</a>()</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reference.html">cutlass::Array< T, N, false >::reference</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reference.html#a2c2ac2556e27f48703a9bc1c4e6ed2aa">reference</a>(Storage *ptr, int idx=0)</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reference.html">cutlass::Array< T, N, false >::reference</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
</table></div><!-- contents -->
|
||||
<!-- start footer part -->
|
||||
<hr class="footer"/><address class="footer"><small>
|
||||
Generated by  <a href="http://www.doxygen.org/index.html">
|
||||
<img class="footer" src="doxygen.png" alt="doxygen"/>
|
||||
</a> 1.8.11
|
||||
</small></address>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,335 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
|
||||
<meta name="generator" content="Doxygen 1.8.11"/>
|
||||
<title>CUTLASS: cutlass::Array< T, N, false >::reference Class Reference</title>
|
||||
<link href="tabs.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="jquery.js"></script>
|
||||
<script type="text/javascript" src="dynsections.js"></script>
|
||||
<link href="search/search.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="search/searchdata.js"></script>
|
||||
<script type="text/javascript" src="search/search.js"></script>
|
||||
<script type="text/javascript">
|
||||
$(document).ready(function() { init_search(); });
|
||||
</script>
|
||||
<script type="text/x-mathjax-config">
|
||||
MathJax.Hub.Config({
|
||||
extensions: ["tex2jax.js"],
|
||||
jax: ["input/TeX","output/HTML-CSS"],
|
||||
});
|
||||
</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
|
||||
<link href="doxygen.css" rel="stylesheet" type="text/css" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
|
||||
<div id="titlearea">
|
||||
<table cellspacing="0" cellpadding="0">
|
||||
<tbody>
|
||||
<tr style="height: 56px;">
|
||||
<td id="projectlogo"><img alt="Logo" src="cutlass-logo-small.png"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">CUTLASS
|
||||
</div>
|
||||
<div id="projectbrief">CUDA Templates for Linear Algebra Subroutines and Solvers</div>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<!-- end header part -->
|
||||
<!-- Generated by Doxygen 1.8.11 -->
|
||||
<script type="text/javascript">
|
||||
var searchBox = new SearchBox("searchBox", "search",false,'Search');
|
||||
</script>
|
||||
<div id="navrow1" class="tabs">
|
||||
<ul class="tablist">
|
||||
<li><a href="index.html"><span>Main Page</span></a></li>
|
||||
<li><a href="modules.html"><span>Modules</span></a></li>
|
||||
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
|
||||
<li class="current"><a href="annotated.html"><span>Classes</span></a></li>
|
||||
<li><a href="files.html"><span>Files</span></a></li>
|
||||
<li>
|
||||
<div id="MSearchBox" class="MSearchBoxInactive">
|
||||
<span class="left">
|
||||
<img id="MSearchSelect" src="search/mag_sel.png"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
alt=""/>
|
||||
<input type="text" id="MSearchField" value="Search" accesskey="S"
|
||||
onfocus="searchBox.OnSearchFieldFocus(true)"
|
||||
onblur="searchBox.OnSearchFieldFocus(false)"
|
||||
onkeyup="searchBox.OnSearchFieldChange(event)"/>
|
||||
</span><span class="right">
|
||||
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
|
||||
</span>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div id="navrow2" class="tabs2">
|
||||
<ul class="tablist">
|
||||
<li><a href="annotated.html"><span>Class List</span></a></li>
|
||||
<li><a href="classes.html"><span>Class Index</span></a></li>
|
||||
<li><a href="inherits.html"><span>Class Hierarchy</span></a></li>
|
||||
<li><a href="functions.html"><span>Class Members</span></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<!-- window showing the filter options -->
|
||||
<div id="MSearchSelectWindow"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
onkeydown="return searchBox.OnSearchSelectKey(event)">
|
||||
</div>
|
||||
|
||||
<!-- iframe showing the search results (closed by default) -->
|
||||
<div id="MSearchResultsWindow">
|
||||
<iframe src="javascript:void(0)" frameborder="0"
|
||||
name="MSearchResults" id="MSearchResults">
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
<div id="nav-path" class="navpath">
|
||||
<ul>
|
||||
<li class="navelem"><a class="el" href="namespacecutlass.html">cutlass</a></li><li class="navelem"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">Array< T, N, false ></a></li><li class="navelem"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reference.html">reference</a></li> </ul>
|
||||
</div>
|
||||
</div><!-- top -->
|
||||
<div class="header">
|
||||
<div class="summary">
|
||||
<a href="#pub-methods">Public Member Functions</a> |
|
||||
<a href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reference-members.html">List of all members</a> </div>
|
||||
<div class="headertitle">
|
||||
<div class="title">cutlass::Array< T, N, false >::reference Class Reference</div> </div>
|
||||
</div><!--header-->
|
||||
<div class="contents">
|
||||
|
||||
<p>Reference object inserts or extracts sub-byte items.
|
||||
</p>
|
||||
|
||||
<p><code>#include <<a class="el" href="array__subbyte_8h_source.html">array_subbyte.h</a>></code></p>
|
||||
<table class="memberdecls">
|
||||
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-methods"></a>
|
||||
Public Member Functions</h2></td></tr>
|
||||
<tr class="memitem:a257c25bee7fa54ff1d492bc0697b05cc"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reference.html#a257c25bee7fa54ff1d492bc0697b05cc">reference</a> ()</td></tr>
|
||||
<tr class="memdesc:a257c25bee7fa54ff1d492bc0697b05cc"><td class="mdescLeft"> </td><td class="mdescRight">Default ctor. <a href="#a257c25bee7fa54ff1d492bc0697b05cc">More...</a><br /></td></tr>
|
||||
<tr class="separator:a257c25bee7fa54ff1d492bc0697b05cc"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:a2c2ac2556e27f48703a9bc1c4e6ed2aa"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reference.html#a2c2ac2556e27f48703a9bc1c4e6ed2aa">reference</a> (<a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a878e152905d602bcdb98e0e6acd8bd82">Storage</a> *ptr, int idx=0)</td></tr>
|
||||
<tr class="memdesc:a2c2ac2556e27f48703a9bc1c4e6ed2aa"><td class="mdescLeft"> </td><td class="mdescRight">Ctor. <a href="#a2c2ac2556e27f48703a9bc1c4e6ed2aa">More...</a><br /></td></tr>
|
||||
<tr class="separator:a2c2ac2556e27f48703a9bc1c4e6ed2aa"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:a629bfbf64481de5252896b45721254ad"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> <a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reference.html#a257c25bee7fa54ff1d492bc0697b05cc">reference</a> & </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reference.html#a629bfbf64481de5252896b45721254ad">operator=</a> (T x)</td></tr>
|
||||
<tr class="memdesc:a629bfbf64481de5252896b45721254ad"><td class="mdescLeft"> </td><td class="mdescRight">Assignment. <a href="#a629bfbf64481de5252896b45721254ad">More...</a><br /></td></tr>
|
||||
<tr class="separator:a629bfbf64481de5252896b45721254ad"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:a3bb74e5ee555773803b39cc478af5069"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> T </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reference.html#a3bb74e5ee555773803b39cc478af5069">get</a> () const </td></tr>
|
||||
<tr class="separator:a3bb74e5ee555773803b39cc478af5069"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:ac51b14ff76b80e7f7bf4142a1af01d82"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reference.html#ac51b14ff76b80e7f7bf4142a1af01d82">operator T</a> () const </td></tr>
|
||||
<tr class="memdesc:ac51b14ff76b80e7f7bf4142a1af01d82"><td class="mdescLeft"> </td><td class="mdescRight">Extract. <a href="#ac51b14ff76b80e7f7bf4142a1af01d82">More...</a><br /></td></tr>
|
||||
<tr class="separator:ac51b14ff76b80e7f7bf4142a1af01d82"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:ad50d57afc8e33ce406f31dc15564cb3a"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reference.html#ad50d57afc8e33ce406f31dc15564cb3a">operator int</a> () const </td></tr>
|
||||
<tr class="memdesc:ad50d57afc8e33ce406f31dc15564cb3a"><td class="mdescLeft"> </td><td class="mdescRight">Explicit cast to int. <a href="#ad50d57afc8e33ce406f31dc15564cb3a">More...</a><br /></td></tr>
|
||||
<tr class="separator:ad50d57afc8e33ce406f31dc15564cb3a"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:ac800b24861e676e21c7d6201338175bc"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reference.html#ac800b24861e676e21c7d6201338175bc">operator float</a> () const </td></tr>
|
||||
<tr class="memdesc:ac800b24861e676e21c7d6201338175bc"><td class="mdescLeft"> </td><td class="mdescRight">Explicit cast to float. <a href="#ac800b24861e676e21c7d6201338175bc">More...</a><br /></td></tr>
|
||||
<tr class="separator:ac800b24861e676e21c7d6201338175bc"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
</table>
|
||||
<h2 class="groupheader">Constructor & Destructor Documentation</h2>
|
||||
<a class="anchor" id="a257c25bee7fa54ff1d492bc0697b05cc"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> cutlass::Array< T, N, false >::reference::reference </td>
|
||||
<td>(</td>
|
||||
<td class="paramname"></td><td>)</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<a class="anchor" id="a2c2ac2556e27f48703a9bc1c4e6ed2aa"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> cutlass::Array< T, N, false >::reference::reference </td>
|
||||
<td>(</td>
|
||||
<td class="paramtype"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a878e152905d602bcdb98e0e6acd8bd82">Storage</a> * </td>
|
||||
<td class="paramname"><em>ptr</em>, </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="paramkey"></td>
|
||||
<td></td>
|
||||
<td class="paramtype">int </td>
|
||||
<td class="paramname"><em>idx</em> = <code>0</code> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td></td>
|
||||
<td>)</td>
|
||||
<td></td><td></td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<h2 class="groupheader">Member Function Documentation</h2>
|
||||
<a class="anchor" id="a3bb74e5ee555773803b39cc478af5069"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> T cutlass::Array< T, N, false >::reference::get </td>
|
||||
<td>(</td>
|
||||
<td class="paramname"></td><td>)</td>
|
||||
<td> const</td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<a class="anchor" id="ac800b24861e676e21c7d6201338175bc"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> cutlass::Array< T, N, false >::reference::operator float </td>
|
||||
<td>(</td>
|
||||
<td class="paramname"></td><td>)</td>
|
||||
<td> const</td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span><span class="mlabel">explicit</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<a class="anchor" id="ad50d57afc8e33ce406f31dc15564cb3a"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> cutlass::Array< T, N, false >::reference::operator int </td>
|
||||
<td>(</td>
|
||||
<td class="paramname"></td><td>)</td>
|
||||
<td> const</td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span><span class="mlabel">explicit</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<a class="anchor" id="ac51b14ff76b80e7f7bf4142a1af01d82"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> cutlass::Array< T, N, false >::reference::operator T </td>
|
||||
<td>(</td>
|
||||
<td class="paramname"></td><td>)</td>
|
||||
<td> const</td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<a class="anchor" id="a629bfbf64481de5252896b45721254ad"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> <a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reference.html#a257c25bee7fa54ff1d492bc0697b05cc">reference</a>& cutlass::Array< T, N, false >::reference::operator= </td>
|
||||
<td>(</td>
|
||||
<td class="paramtype">T </td>
|
||||
<td class="paramname"><em>x</em></td><td>)</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<hr/>The documentation for this class was generated from the following file:<ul>
|
||||
<li><a class="el" href="array__subbyte_8h_source.html">array_subbyte.h</a></li>
|
||||
</ul>
|
||||
</div><!-- contents -->
|
||||
<!-- start footer part -->
|
||||
<hr class="footer"/><address class="footer"><small>
|
||||
Generated by  <a href="http://www.doxygen.org/index.html">
|
||||
<img class="footer" src="doxygen.png" alt="doxygen"/>
|
||||
</a> 1.8.11
|
||||
</small></address>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,116 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
|
||||
<meta name="generator" content="Doxygen 1.8.11"/>
|
||||
<title>CUTLASS: Member List</title>
|
||||
<link href="tabs.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="jquery.js"></script>
|
||||
<script type="text/javascript" src="dynsections.js"></script>
|
||||
<link href="search/search.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="search/searchdata.js"></script>
|
||||
<script type="text/javascript" src="search/search.js"></script>
|
||||
<script type="text/javascript">
|
||||
$(document).ready(function() { init_search(); });
|
||||
</script>
|
||||
<script type="text/x-mathjax-config">
|
||||
MathJax.Hub.Config({
|
||||
extensions: ["tex2jax.js"],
|
||||
jax: ["input/TeX","output/HTML-CSS"],
|
||||
});
|
||||
</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
|
||||
<link href="doxygen.css" rel="stylesheet" type="text/css" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
|
||||
<div id="titlearea">
|
||||
<table cellspacing="0" cellpadding="0">
|
||||
<tbody>
|
||||
<tr style="height: 56px;">
|
||||
<td id="projectlogo"><img alt="Logo" src="cutlass-logo-small.png"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">CUTLASS
|
||||
</div>
|
||||
<div id="projectbrief">CUDA Templates for Linear Algebra Subroutines and Solvers</div>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<!-- end header part -->
|
||||
<!-- Generated by Doxygen 1.8.11 -->
|
||||
<script type="text/javascript">
|
||||
var searchBox = new SearchBox("searchBox", "search",false,'Search');
|
||||
</script>
|
||||
<div id="navrow1" class="tabs">
|
||||
<ul class="tablist">
|
||||
<li><a href="index.html"><span>Main Page</span></a></li>
|
||||
<li><a href="modules.html"><span>Modules</span></a></li>
|
||||
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
|
||||
<li class="current"><a href="annotated.html"><span>Classes</span></a></li>
|
||||
<li><a href="files.html"><span>Files</span></a></li>
|
||||
<li>
|
||||
<div id="MSearchBox" class="MSearchBoxInactive">
|
||||
<span class="left">
|
||||
<img id="MSearchSelect" src="search/mag_sel.png"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
alt=""/>
|
||||
<input type="text" id="MSearchField" value="Search" accesskey="S"
|
||||
onfocus="searchBox.OnSearchFieldFocus(true)"
|
||||
onblur="searchBox.OnSearchFieldFocus(false)"
|
||||
onkeyup="searchBox.OnSearchFieldChange(event)"/>
|
||||
</span><span class="right">
|
||||
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
|
||||
</span>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div id="navrow2" class="tabs2">
|
||||
<ul class="tablist">
|
||||
<li><a href="annotated.html"><span>Class List</span></a></li>
|
||||
<li><a href="classes.html"><span>Class Index</span></a></li>
|
||||
<li><a href="inherits.html"><span>Class Hierarchy</span></a></li>
|
||||
<li><a href="functions.html"><span>Class Members</span></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<!-- window showing the filter options -->
|
||||
<div id="MSearchSelectWindow"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
onkeydown="return searchBox.OnSearchSelectKey(event)">
|
||||
</div>
|
||||
|
||||
<!-- iframe showing the search results (closed by default) -->
|
||||
<div id="MSearchResultsWindow">
|
||||
<iframe src="javascript:void(0)" frameborder="0"
|
||||
name="MSearchResults" id="MSearchResults">
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
<div id="nav-path" class="navpath">
|
||||
<ul>
|
||||
<li class="navelem"><a class="el" href="namespacecutlass.html">cutlass</a></li><li class="navelem"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">Array< T, N, false ></a></li><li class="navelem"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reverse__iterator.html">reverse_iterator</a></li> </ul>
|
||||
</div>
|
||||
</div><!-- top -->
|
||||
<div class="header">
|
||||
<div class="headertitle">
|
||||
<div class="title">cutlass::Array< T, N, false >::reverse_iterator Member List</div> </div>
|
||||
</div><!--header-->
|
||||
<div class="contents">
|
||||
|
||||
<p>This is the complete list of members for <a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reverse__iterator.html">cutlass::Array< T, N, false >::reverse_iterator</a>, including all inherited members.</p>
|
||||
<table class="directory">
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reverse__iterator.html#a539eda60222f630592b9914b51307ea1">reverse_iterator</a>()</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reverse__iterator.html">cutlass::Array< T, N, false >::reverse_iterator</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reverse__iterator.html#a939c336c7c727748d9efcd5efa066a88">reverse_iterator</a>(Storage *ptr, int idx=0)</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reverse__iterator.html">cutlass::Array< T, N, false >::reverse_iterator</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
</table></div><!-- contents -->
|
||||
<!-- start footer part -->
|
||||
<hr class="footer"/><address class="footer"><small>
|
||||
Generated by  <a href="http://www.doxygen.org/index.html">
|
||||
<img class="footer" src="doxygen.png" alt="doxygen"/>
|
||||
</a> 1.8.11
|
||||
</small></address>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,192 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
|
||||
<meta name="generator" content="Doxygen 1.8.11"/>
|
||||
<title>CUTLASS: cutlass::Array< T, N, false >::reverse_iterator Class Reference</title>
|
||||
<link href="tabs.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="jquery.js"></script>
|
||||
<script type="text/javascript" src="dynsections.js"></script>
|
||||
<link href="search/search.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="search/searchdata.js"></script>
|
||||
<script type="text/javascript" src="search/search.js"></script>
|
||||
<script type="text/javascript">
|
||||
$(document).ready(function() { init_search(); });
|
||||
</script>
|
||||
<script type="text/x-mathjax-config">
|
||||
MathJax.Hub.Config({
|
||||
extensions: ["tex2jax.js"],
|
||||
jax: ["input/TeX","output/HTML-CSS"],
|
||||
});
|
||||
</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
|
||||
<link href="doxygen.css" rel="stylesheet" type="text/css" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
|
||||
<div id="titlearea">
|
||||
<table cellspacing="0" cellpadding="0">
|
||||
<tbody>
|
||||
<tr style="height: 56px;">
|
||||
<td id="projectlogo"><img alt="Logo" src="cutlass-logo-small.png"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">CUTLASS
|
||||
</div>
|
||||
<div id="projectbrief">CUDA Templates for Linear Algebra Subroutines and Solvers</div>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<!-- end header part -->
|
||||
<!-- Generated by Doxygen 1.8.11 -->
|
||||
<script type="text/javascript">
|
||||
var searchBox = new SearchBox("searchBox", "search",false,'Search');
|
||||
</script>
|
||||
<div id="navrow1" class="tabs">
|
||||
<ul class="tablist">
|
||||
<li><a href="index.html"><span>Main Page</span></a></li>
|
||||
<li><a href="modules.html"><span>Modules</span></a></li>
|
||||
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
|
||||
<li class="current"><a href="annotated.html"><span>Classes</span></a></li>
|
||||
<li><a href="files.html"><span>Files</span></a></li>
|
||||
<li>
|
||||
<div id="MSearchBox" class="MSearchBoxInactive">
|
||||
<span class="left">
|
||||
<img id="MSearchSelect" src="search/mag_sel.png"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
alt=""/>
|
||||
<input type="text" id="MSearchField" value="Search" accesskey="S"
|
||||
onfocus="searchBox.OnSearchFieldFocus(true)"
|
||||
onblur="searchBox.OnSearchFieldFocus(false)"
|
||||
onkeyup="searchBox.OnSearchFieldChange(event)"/>
|
||||
</span><span class="right">
|
||||
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
|
||||
</span>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div id="navrow2" class="tabs2">
|
||||
<ul class="tablist">
|
||||
<li><a href="annotated.html"><span>Class List</span></a></li>
|
||||
<li><a href="classes.html"><span>Class Index</span></a></li>
|
||||
<li><a href="inherits.html"><span>Class Hierarchy</span></a></li>
|
||||
<li><a href="functions.html"><span>Class Members</span></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<!-- window showing the filter options -->
|
||||
<div id="MSearchSelectWindow"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
onkeydown="return searchBox.OnSearchSelectKey(event)">
|
||||
</div>
|
||||
|
||||
<!-- iframe showing the search results (closed by default) -->
|
||||
<div id="MSearchResultsWindow">
|
||||
<iframe src="javascript:void(0)" frameborder="0"
|
||||
name="MSearchResults" id="MSearchResults">
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
<div id="nav-path" class="navpath">
|
||||
<ul>
|
||||
<li class="navelem"><a class="el" href="namespacecutlass.html">cutlass</a></li><li class="navelem"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html">Array< T, N, false ></a></li><li class="navelem"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reverse__iterator.html">reverse_iterator</a></li> </ul>
|
||||
</div>
|
||||
</div><!-- top -->
|
||||
<div class="header">
|
||||
<div class="summary">
|
||||
<a href="#pub-methods">Public Member Functions</a> |
|
||||
<a href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reverse__iterator-members.html">List of all members</a> </div>
|
||||
<div class="headertitle">
|
||||
<div class="title">cutlass::Array< T, N, false >::reverse_iterator Class Reference</div> </div>
|
||||
</div><!--header-->
|
||||
<div class="contents">
|
||||
|
||||
<p>Bidirectional iterator over elements.
|
||||
</p>
|
||||
|
||||
<p><code>#include <<a class="el" href="array__subbyte_8h_source.html">array_subbyte.h</a>></code></p>
|
||||
<table class="memberdecls">
|
||||
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-methods"></a>
|
||||
Public Member Functions</h2></td></tr>
|
||||
<tr class="memitem:a539eda60222f630592b9914b51307ea1"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reverse__iterator.html#a539eda60222f630592b9914b51307ea1">reverse_iterator</a> ()</td></tr>
|
||||
<tr class="separator:a539eda60222f630592b9914b51307ea1"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:a939c336c7c727748d9efcd5efa066a88"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4_1_1reverse__iterator.html#a939c336c7c727748d9efcd5efa066a88">reverse_iterator</a> (<a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a878e152905d602bcdb98e0e6acd8bd82">Storage</a> *ptr, int idx=0)</td></tr>
|
||||
<tr class="separator:a939c336c7c727748d9efcd5efa066a88"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
</table>
|
||||
<h2 class="groupheader">Constructor & Destructor Documentation</h2>
|
||||
<a class="anchor" id="a539eda60222f630592b9914b51307ea1"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> cutlass::Array< T, N, false >::reverse_iterator::reverse_iterator </td>
|
||||
<td>(</td>
|
||||
<td class="paramname"></td><td>)</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<a class="anchor" id="a939c336c7c727748d9efcd5efa066a88"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> cutlass::Array< T, N, false >::reverse_iterator::reverse_iterator </td>
|
||||
<td>(</td>
|
||||
<td class="paramtype"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a878e152905d602bcdb98e0e6acd8bd82">Storage</a> * </td>
|
||||
<td class="paramname"><em>ptr</em>, </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="paramkey"></td>
|
||||
<td></td>
|
||||
<td class="paramtype">int </td>
|
||||
<td class="paramname"><em>idx</em> = <code>0</code> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td></td>
|
||||
<td>)</td>
|
||||
<td></td><td></td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<hr/>The documentation for this class was generated from the following file:<ul>
|
||||
<li><a class="el" href="array__subbyte_8h_source.html">array_subbyte.h</a></li>
|
||||
</ul>
|
||||
</div><!-- contents -->
|
||||
<!-- start footer part -->
|
||||
<hr class="footer"/><address class="footer"><small>
|
||||
Generated by  <a href="http://www.doxygen.org/index.html">
|
||||
<img class="footer" src="doxygen.png" alt="doxygen"/>
|
||||
</a> 1.8.11
|
||||
</small></address>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,152 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
|
||||
<meta name="generator" content="Doxygen 1.8.11"/>
|
||||
<title>CUTLASS: Member List</title>
|
||||
<link href="tabs.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="jquery.js"></script>
|
||||
<script type="text/javascript" src="dynsections.js"></script>
|
||||
<link href="search/search.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="search/searchdata.js"></script>
|
||||
<script type="text/javascript" src="search/search.js"></script>
|
||||
<script type="text/javascript">
|
||||
$(document).ready(function() { init_search(); });
|
||||
</script>
|
||||
<script type="text/x-mathjax-config">
|
||||
MathJax.Hub.Config({
|
||||
extensions: ["tex2jax.js"],
|
||||
jax: ["input/TeX","output/HTML-CSS"],
|
||||
});
|
||||
</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
|
||||
<link href="doxygen.css" rel="stylesheet" type="text/css" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
|
||||
<div id="titlearea">
|
||||
<table cellspacing="0" cellpadding="0">
|
||||
<tbody>
|
||||
<tr style="height: 56px;">
|
||||
<td id="projectlogo"><img alt="Logo" src="cutlass-logo-small.png"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">CUTLASS
|
||||
</div>
|
||||
<div id="projectbrief">CUDA Templates for Linear Algebra Subroutines and Solvers</div>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<!-- end header part -->
|
||||
<!-- Generated by Doxygen 1.8.11 -->
|
||||
<script type="text/javascript">
|
||||
var searchBox = new SearchBox("searchBox", "search",false,'Search');
|
||||
</script>
|
||||
<div id="navrow1" class="tabs">
|
||||
<ul class="tablist">
|
||||
<li><a href="index.html"><span>Main Page</span></a></li>
|
||||
<li><a href="modules.html"><span>Modules</span></a></li>
|
||||
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
|
||||
<li class="current"><a href="annotated.html"><span>Classes</span></a></li>
|
||||
<li><a href="files.html"><span>Files</span></a></li>
|
||||
<li>
|
||||
<div id="MSearchBox" class="MSearchBoxInactive">
|
||||
<span class="left">
|
||||
<img id="MSearchSelect" src="search/mag_sel.png"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
alt=""/>
|
||||
<input type="text" id="MSearchField" value="Search" accesskey="S"
|
||||
onfocus="searchBox.OnSearchFieldFocus(true)"
|
||||
onblur="searchBox.OnSearchFieldFocus(false)"
|
||||
onkeyup="searchBox.OnSearchFieldChange(event)"/>
|
||||
</span><span class="right">
|
||||
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
|
||||
</span>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div id="navrow2" class="tabs2">
|
||||
<ul class="tablist">
|
||||
<li><a href="annotated.html"><span>Class List</span></a></li>
|
||||
<li><a href="classes.html"><span>Class Index</span></a></li>
|
||||
<li><a href="inherits.html"><span>Class Hierarchy</span></a></li>
|
||||
<li><a href="functions.html"><span>Class Members</span></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<!-- window showing the filter options -->
|
||||
<div id="MSearchSelectWindow"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
onkeydown="return searchBox.OnSearchSelectKey(event)">
|
||||
</div>
|
||||
|
||||
<!-- iframe showing the search results (closed by default) -->
|
||||
<div id="MSearchResultsWindow">
|
||||
<iframe src="javascript:void(0)" frameborder="0"
|
||||
name="MSearchResults" id="MSearchResults">
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
<div id="nav-path" class="navpath">
|
||||
<ul>
|
||||
<li class="navelem"><a class="el" href="namespacecutlass.html">cutlass</a></li><li class="navelem"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">Array< T, N, true ></a></li> </ul>
|
||||
</div>
|
||||
</div><!-- top -->
|
||||
<div class="header">
|
||||
<div class="headertitle">
|
||||
<div class="title">cutlass::Array< T, N, true > Member List</div> </div>
|
||||
</div><!--header-->
|
||||
<div class="contents">
|
||||
|
||||
<p>This is the complete list of members for <a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a>, including all inherited members.</p>
|
||||
<table class="directory">
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#a168315948a76d6ae9d7491ad0e1ca302">Array</a>()</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#a22af701f6f542b29198c759b653d3fb0">Array</a>(Array const &x)</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#aedd3e189bcbbb69ecd98978bcbbc3f1f">at</a>(size_type pos)</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#ab504833fe30934eeb6e71e235e7942f1">at</a>(size_type pos) const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#aa193b8e73b93639f84224d1fea46330d">back</a>()</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#a6c81a715431cf5a772c2273362df97fd">back</a>() const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#acf5a84cce457d31be7d30c57ab52f64c">begin</a>()</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#a815d434e9da9715a115896b3f6e64608">cbegin</a>() const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#a27e663ee5e22d4af436588a500a6cc0c">cend</a>() const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#ae67b1d98a446384fc75a1c92474e719d">clear</a>()</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#a319dba33ebc8556e58f699f32c6a391b">const_pointer</a> typedef</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#ad64094119b89bb538cd1c1ea979c7954">const_reference</a> typedef</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#ab1813941489bef9563cc0bc3f647b2ca">crbegin</a>() const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#a76e1b5d728b155f9d967a43c0cc3b0dd">crend</a>() const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#af47ab51582aa1e4c811a9e111b594556">data</a>()</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#a3d3d2637b7051145a2048cff1b55c0bf">data</a>() const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#a7ffe7541c2cadd34bc6e65ad351772ce">difference_type</a> typedef</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#a7bf5b693d01e004852c642400d0e9b89">Element</a> typedef</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#a5d1028cb678773f861add6b47f13de78">empty</a>() const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#a8e5c83ff2ad6bbfeb5ba0e3c04e3843a">end</a>()</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#a0b3f29a6d79dd9cd55de367c96ecfc5c">fill</a>(T const &value)</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#a5adbb5bb00cca5e538cd1215d1de08a4">front</a>()</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#a0a692495c5f7a7d098e60b9292a07e4f">front</a>() const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#a59927c40660b5f39218f5867d4158e5e">kElements</a></td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"><span class="mlabel">static</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#aff4b09f36ec3f8861ebd2db338a298b2">kStorageElements</a></td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"><span class="mlabel">static</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#a3391b79db2b9f3bac9576c9bc7af0402">max_size</a>() const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#a0fea9a8e9f9def4c0059bba750a95167">operator[]</a>(size_type pos)</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#a9812d796007116dbd8b20117976deb48">operator[]</a>(size_type pos) const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#a949beb7b21ad69d3a3bc394235dd8ec0">pointer</a> typedef</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#ae4e76ed2b36a4deda6ef36b00fdda363">raw_data</a>()</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#a90aaac40587e3ae5622030e999995f40">raw_data</a>() const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#ad8ec17a6d004cb6ffd4450c0686cd924">rbegin</a>()</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#a5827968c9c3deca639f5981ad895fe67">reference</a> typedef</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#a6081f288dfc7b60da8d00913be8e83db">rend</a>()</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#ac01c21b1956b645165150cfd0d0b0277">size</a>() const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#ad0117378d6f0eda984b974ca760ae984">size_type</a> typedef</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#a70e53f314dc7b7bb6050486d18c14b31">Storage</a> typedef</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#a9109f9dc42faa978ac2f846b98b29eb9">value_type</a> typedef</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">cutlass::Array< T, N, true ></a></td><td class="entry"></td></tr>
|
||||
</table></div><!-- contents -->
|
||||
<!-- start footer part -->
|
||||
<hr class="footer"/><address class="footer"><small>
|
||||
Generated by  <a href="http://www.doxygen.org/index.html">
|
||||
<img class="footer" src="doxygen.png" alt="doxygen"/>
|
||||
</a> 1.8.11
|
||||
</small></address>
|
||||
</body>
|
||||
</html>
|
||||
1086
flashinfer_0.3.1/3rdparty/cutlass/docs/classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html
vendored
Normal file
1086
flashinfer_0.3.1/3rdparty/cutlass/docs/classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html
vendored
Normal file
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,123 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
|
||||
<meta name="generator" content="Doxygen 1.8.11"/>
|
||||
<title>CUTLASS: Member List</title>
|
||||
<link href="tabs.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="jquery.js"></script>
|
||||
<script type="text/javascript" src="dynsections.js"></script>
|
||||
<link href="search/search.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="search/searchdata.js"></script>
|
||||
<script type="text/javascript" src="search/search.js"></script>
|
||||
<script type="text/javascript">
|
||||
$(document).ready(function() { init_search(); });
|
||||
</script>
|
||||
<script type="text/x-mathjax-config">
|
||||
MathJax.Hub.Config({
|
||||
extensions: ["tex2jax.js"],
|
||||
jax: ["input/TeX","output/HTML-CSS"],
|
||||
});
|
||||
</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
|
||||
<link href="doxygen.css" rel="stylesheet" type="text/css" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
|
||||
<div id="titlearea">
|
||||
<table cellspacing="0" cellpadding="0">
|
||||
<tbody>
|
||||
<tr style="height: 56px;">
|
||||
<td id="projectlogo"><img alt="Logo" src="cutlass-logo-small.png"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">CUTLASS
|
||||
</div>
|
||||
<div id="projectbrief">CUDA Templates for Linear Algebra Subroutines and Solvers</div>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<!-- end header part -->
|
||||
<!-- Generated by Doxygen 1.8.11 -->
|
||||
<script type="text/javascript">
|
||||
var searchBox = new SearchBox("searchBox", "search",false,'Search');
|
||||
</script>
|
||||
<div id="navrow1" class="tabs">
|
||||
<ul class="tablist">
|
||||
<li><a href="index.html"><span>Main Page</span></a></li>
|
||||
<li><a href="modules.html"><span>Modules</span></a></li>
|
||||
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
|
||||
<li class="current"><a href="annotated.html"><span>Classes</span></a></li>
|
||||
<li><a href="files.html"><span>Files</span></a></li>
|
||||
<li>
|
||||
<div id="MSearchBox" class="MSearchBoxInactive">
|
||||
<span class="left">
|
||||
<img id="MSearchSelect" src="search/mag_sel.png"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
alt=""/>
|
||||
<input type="text" id="MSearchField" value="Search" accesskey="S"
|
||||
onfocus="searchBox.OnSearchFieldFocus(true)"
|
||||
onblur="searchBox.OnSearchFieldFocus(false)"
|
||||
onkeyup="searchBox.OnSearchFieldChange(event)"/>
|
||||
</span><span class="right">
|
||||
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
|
||||
</span>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div id="navrow2" class="tabs2">
|
||||
<ul class="tablist">
|
||||
<li><a href="annotated.html"><span>Class List</span></a></li>
|
||||
<li><a href="classes.html"><span>Class Index</span></a></li>
|
||||
<li><a href="inherits.html"><span>Class Hierarchy</span></a></li>
|
||||
<li><a href="functions.html"><span>Class Members</span></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<!-- window showing the filter options -->
|
||||
<div id="MSearchSelectWindow"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
onkeydown="return searchBox.OnSearchSelectKey(event)">
|
||||
</div>
|
||||
|
||||
<!-- iframe showing the search results (closed by default) -->
|
||||
<div id="MSearchResultsWindow">
|
||||
<iframe src="javascript:void(0)" frameborder="0"
|
||||
name="MSearchResults" id="MSearchResults">
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
<div id="nav-path" class="navpath">
|
||||
<ul>
|
||||
<li class="navelem"><a class="el" href="namespacecutlass.html">cutlass</a></li><li class="navelem"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">Array< T, N, true ></a></li><li class="navelem"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html">const_iterator</a></li> </ul>
|
||||
</div>
|
||||
</div><!-- top -->
|
||||
<div class="header">
|
||||
<div class="headertitle">
|
||||
<div class="title">cutlass::Array< T, N, true >::const_iterator Member List</div> </div>
|
||||
</div><!--header-->
|
||||
<div class="contents">
|
||||
|
||||
<p>This is the complete list of members for <a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html">cutlass::Array< T, N, true >::const_iterator</a>, including all inherited members.</p>
|
||||
<table class="directory">
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html#a40f18ab5962efa95ac4ae4f5140c5d7b">const_iterator</a>()</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html">cutlass::Array< T, N, true >::const_iterator</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html#a56cb84bfcb97eeeae472f03fc203d759">const_iterator</a>(T const *_ptr)</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html">cutlass::Array< T, N, true >::const_iterator</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html#a6a6e0f4caab421bbe90a94d199df0281">operator!=</a>(const_iterator const &other) const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html">cutlass::Array< T, N, true >::const_iterator</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html#af34c15c6d1d13db36ffe4b112bc75d47">operator*</a>() const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html">cutlass::Array< T, N, true >::const_iterator</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html#ae148a1e543b22c1c4ec20374bc8929b3">operator++</a>()</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html">cutlass::Array< T, N, true >::const_iterator</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html#aa9e22b7054da29fc4863051f2bb05ff7">operator++</a>(int)</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html">cutlass::Array< T, N, true >::const_iterator</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html#a39085604c1b7a0dee3f5a0b96776d297">operator--</a>()</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html">cutlass::Array< T, N, true >::const_iterator</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html#afc73e87dfebf9990e76aa47de2d30311">operator--</a>(int)</td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html">cutlass::Array< T, N, true >::const_iterator</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
<tr class="even"><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html#a25c770f60b9e8f8d7eb2e58efcb7c3e1">operator==</a>(const_iterator const &other) const </td><td class="entry"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html">cutlass::Array< T, N, true >::const_iterator</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
|
||||
</table></div><!-- contents -->
|
||||
<!-- start footer part -->
|
||||
<hr class="footer"/><address class="footer"><small>
|
||||
Generated by  <a href="http://www.doxygen.org/index.html">
|
||||
<img class="footer" src="doxygen.png" alt="doxygen"/>
|
||||
</a> 1.8.11
|
||||
</small></address>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,376 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
|
||||
<meta name="generator" content="Doxygen 1.8.11"/>
|
||||
<title>CUTLASS: cutlass::Array< T, N, true >::const_iterator Class Reference</title>
|
||||
<link href="tabs.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="jquery.js"></script>
|
||||
<script type="text/javascript" src="dynsections.js"></script>
|
||||
<link href="search/search.css" rel="stylesheet" type="text/css"/>
|
||||
<script type="text/javascript" src="search/searchdata.js"></script>
|
||||
<script type="text/javascript" src="search/search.js"></script>
|
||||
<script type="text/javascript">
|
||||
$(document).ready(function() { init_search(); });
|
||||
</script>
|
||||
<script type="text/x-mathjax-config">
|
||||
MathJax.Hub.Config({
|
||||
extensions: ["tex2jax.js"],
|
||||
jax: ["input/TeX","output/HTML-CSS"],
|
||||
});
|
||||
</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
|
||||
<link href="doxygen.css" rel="stylesheet" type="text/css" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
|
||||
<div id="titlearea">
|
||||
<table cellspacing="0" cellpadding="0">
|
||||
<tbody>
|
||||
<tr style="height: 56px;">
|
||||
<td id="projectlogo"><img alt="Logo" src="cutlass-logo-small.png"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">CUTLASS
|
||||
</div>
|
||||
<div id="projectbrief">CUDA Templates for Linear Algebra Subroutines and Solvers</div>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<!-- end header part -->
|
||||
<!-- Generated by Doxygen 1.8.11 -->
|
||||
<script type="text/javascript">
|
||||
var searchBox = new SearchBox("searchBox", "search",false,'Search');
|
||||
</script>
|
||||
<div id="navrow1" class="tabs">
|
||||
<ul class="tablist">
|
||||
<li><a href="index.html"><span>Main Page</span></a></li>
|
||||
<li><a href="modules.html"><span>Modules</span></a></li>
|
||||
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
|
||||
<li class="current"><a href="annotated.html"><span>Classes</span></a></li>
|
||||
<li><a href="files.html"><span>Files</span></a></li>
|
||||
<li>
|
||||
<div id="MSearchBox" class="MSearchBoxInactive">
|
||||
<span class="left">
|
||||
<img id="MSearchSelect" src="search/mag_sel.png"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
alt=""/>
|
||||
<input type="text" id="MSearchField" value="Search" accesskey="S"
|
||||
onfocus="searchBox.OnSearchFieldFocus(true)"
|
||||
onblur="searchBox.OnSearchFieldFocus(false)"
|
||||
onkeyup="searchBox.OnSearchFieldChange(event)"/>
|
||||
</span><span class="right">
|
||||
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
|
||||
</span>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div id="navrow2" class="tabs2">
|
||||
<ul class="tablist">
|
||||
<li><a href="annotated.html"><span>Class List</span></a></li>
|
||||
<li><a href="classes.html"><span>Class Index</span></a></li>
|
||||
<li><a href="inherits.html"><span>Class Hierarchy</span></a></li>
|
||||
<li><a href="functions.html"><span>Class Members</span></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<!-- window showing the filter options -->
|
||||
<div id="MSearchSelectWindow"
|
||||
onmouseover="return searchBox.OnSearchSelectShow()"
|
||||
onmouseout="return searchBox.OnSearchSelectHide()"
|
||||
onkeydown="return searchBox.OnSearchSelectKey(event)">
|
||||
</div>
|
||||
|
||||
<!-- iframe showing the search results (closed by default) -->
|
||||
<div id="MSearchResultsWindow">
|
||||
<iframe src="javascript:void(0)" frameborder="0"
|
||||
name="MSearchResults" id="MSearchResults">
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
<div id="nav-path" class="navpath">
|
||||
<ul>
|
||||
<li class="navelem"><a class="el" href="namespacecutlass.html">cutlass</a></li><li class="navelem"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html">Array< T, N, true ></a></li><li class="navelem"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html">const_iterator</a></li> </ul>
|
||||
</div>
|
||||
</div><!-- top -->
|
||||
<div class="header">
|
||||
<div class="summary">
|
||||
<a href="#pub-methods">Public Member Functions</a> |
|
||||
<a href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator-members.html">List of all members</a> </div>
|
||||
<div class="headertitle">
|
||||
<div class="title">cutlass::Array< T, N, true >::const_iterator Class Reference</div> </div>
|
||||
</div><!--header-->
|
||||
<div class="contents">
|
||||
|
||||
<p>Bidirectional constant iterator over elements.
|
||||
</p>
|
||||
|
||||
<p><code>#include <<a class="el" href="array_8h_source.html">array.h</a>></code></p>
|
||||
<table class="memberdecls">
|
||||
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-methods"></a>
|
||||
Public Member Functions</h2></td></tr>
|
||||
<tr class="memitem:a40f18ab5962efa95ac4ae4f5140c5d7b"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html#a40f18ab5962efa95ac4ae4f5140c5d7b">const_iterator</a> ()</td></tr>
|
||||
<tr class="separator:a40f18ab5962efa95ac4ae4f5140c5d7b"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:a56cb84bfcb97eeeae472f03fc203d759"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html#a56cb84bfcb97eeeae472f03fc203d759">const_iterator</a> (T const *_ptr)</td></tr>
|
||||
<tr class="separator:a56cb84bfcb97eeeae472f03fc203d759"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:ae148a1e543b22c1c4ec20374bc8929b3"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> <a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html#a40f18ab5962efa95ac4ae4f5140c5d7b">const_iterator</a> & </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html#ae148a1e543b22c1c4ec20374bc8929b3">operator++</a> ()</td></tr>
|
||||
<tr class="separator:ae148a1e543b22c1c4ec20374bc8929b3"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:a39085604c1b7a0dee3f5a0b96776d297"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> <a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html#a40f18ab5962efa95ac4ae4f5140c5d7b">const_iterator</a> & </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html#a39085604c1b7a0dee3f5a0b96776d297">operator--</a> ()</td></tr>
|
||||
<tr class="separator:a39085604c1b7a0dee3f5a0b96776d297"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:aa9e22b7054da29fc4863051f2bb05ff7"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> <a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html#a40f18ab5962efa95ac4ae4f5140c5d7b">const_iterator</a> </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html#aa9e22b7054da29fc4863051f2bb05ff7">operator++</a> (int)</td></tr>
|
||||
<tr class="separator:aa9e22b7054da29fc4863051f2bb05ff7"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:afc73e87dfebf9990e76aa47de2d30311"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> <a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html#a40f18ab5962efa95ac4ae4f5140c5d7b">const_iterator</a> </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html#afc73e87dfebf9990e76aa47de2d30311">operator--</a> (int)</td></tr>
|
||||
<tr class="separator:afc73e87dfebf9990e76aa47de2d30311"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:af34c15c6d1d13db36ffe4b112bc75d47"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> T const & </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html#af34c15c6d1d13db36ffe4b112bc75d47">operator*</a> () const </td></tr>
|
||||
<tr class="separator:af34c15c6d1d13db36ffe4b112bc75d47"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:a25c770f60b9e8f8d7eb2e58efcb7c3e1"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> bool </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html#a25c770f60b9e8f8d7eb2e58efcb7c3e1">operator==</a> (<a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html#a40f18ab5962efa95ac4ae4f5140c5d7b">const_iterator</a> const &other) const </td></tr>
|
||||
<tr class="separator:a25c770f60b9e8f8d7eb2e58efcb7c3e1"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
<tr class="memitem:a6a6e0f4caab421bbe90a94d199df0281"><td class="memItemLeft" align="right" valign="top"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> bool </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html#a6a6e0f4caab421bbe90a94d199df0281">operator!=</a> (<a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html#a40f18ab5962efa95ac4ae4f5140c5d7b">const_iterator</a> const &other) const </td></tr>
|
||||
<tr class="separator:a6a6e0f4caab421bbe90a94d199df0281"><td class="memSeparator" colspan="2"> </td></tr>
|
||||
</table>
|
||||
<h2 class="groupheader">Constructor & Destructor Documentation</h2>
|
||||
<a class="anchor" id="a40f18ab5962efa95ac4ae4f5140c5d7b"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> cutlass::Array< T, N, true >::const_iterator::const_iterator </td>
|
||||
<td>(</td>
|
||||
<td class="paramname"></td><td>)</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<a class="anchor" id="a56cb84bfcb97eeeae472f03fc203d759"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> cutlass::Array< T, N, true >::const_iterator::const_iterator </td>
|
||||
<td>(</td>
|
||||
<td class="paramtype">T const * </td>
|
||||
<td class="paramname"><em>_ptr</em></td><td>)</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<h2 class="groupheader">Member Function Documentation</h2>
|
||||
<a class="anchor" id="a6a6e0f4caab421bbe90a94d199df0281"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> bool cutlass::Array< T, N, true >::const_iterator::operator!= </td>
|
||||
<td>(</td>
|
||||
<td class="paramtype"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html#a40f18ab5962efa95ac4ae4f5140c5d7b">const_iterator</a> const & </td>
|
||||
<td class="paramname"><em>other</em></td><td>)</td>
|
||||
<td> const</td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<a class="anchor" id="af34c15c6d1d13db36ffe4b112bc75d47"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> T const& cutlass::Array< T, N, true >::const_iterator::operator* </td>
|
||||
<td>(</td>
|
||||
<td class="paramname"></td><td>)</td>
|
||||
<td> const</td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<a class="anchor" id="ae148a1e543b22c1c4ec20374bc8929b3"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> <a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html#a40f18ab5962efa95ac4ae4f5140c5d7b">const_iterator</a>& cutlass::Array< T, N, true >::const_iterator::operator++ </td>
|
||||
<td>(</td>
|
||||
<td class="paramname"></td><td>)</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<a class="anchor" id="aa9e22b7054da29fc4863051f2bb05ff7"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> <a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html#a40f18ab5962efa95ac4ae4f5140c5d7b">const_iterator</a> cutlass::Array< T, N, true >::const_iterator::operator++ </td>
|
||||
<td>(</td>
|
||||
<td class="paramtype">int </td>
|
||||
<td class="paramname"></td><td>)</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<a class="anchor" id="a39085604c1b7a0dee3f5a0b96776d297"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> <a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html#a40f18ab5962efa95ac4ae4f5140c5d7b">const_iterator</a>& cutlass::Array< T, N, true >::const_iterator::operator-- </td>
|
||||
<td>(</td>
|
||||
<td class="paramname"></td><td>)</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<a class="anchor" id="afc73e87dfebf9990e76aa47de2d30311"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> <a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html#a40f18ab5962efa95ac4ae4f5140c5d7b">const_iterator</a> cutlass::Array< T, N, true >::const_iterator::operator-- </td>
|
||||
<td>(</td>
|
||||
<td class="paramtype">int </td>
|
||||
<td class="paramname"></td><td>)</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<a class="anchor" id="a25c770f60b9e8f8d7eb2e58efcb7c3e1"></a>
|
||||
<div class="memitem">
|
||||
<div class="memproto">
|
||||
<div class="memtemplate">
|
||||
template<typename T , int N> </div>
|
||||
<table class="mlabels">
|
||||
<tr>
|
||||
<td class="mlabels-left">
|
||||
<table class="memname">
|
||||
<tr>
|
||||
<td class="memname"><a class="el" href="cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1">CUTLASS_HOST_DEVICE</a> bool cutlass::Array< T, N, true >::const_iterator::operator== </td>
|
||||
<td>(</td>
|
||||
<td class="paramtype"><a class="el" href="classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4_1_1const__iterator.html#a40f18ab5962efa95ac4ae4f5140c5d7b">const_iterator</a> const & </td>
|
||||
<td class="paramname"><em>other</em></td><td>)</td>
|
||||
<td> const</td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<td class="mlabels-right">
|
||||
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div><div class="memdoc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<hr/>The documentation for this class was generated from the following file:<ul>
|
||||
<li><a class="el" href="array_8h_source.html">array.h</a></li>
|
||||
</ul>
|
||||
</div><!-- contents -->
|
||||
<!-- start footer part -->
|
||||
<hr class="footer"/><address class="footer"><small>
|
||||
Generated by  <a href="http://www.doxygen.org/index.html">
|
||||
<img class="footer" src="doxygen.png" alt="doxygen"/>
|
||||
</a> 1.8.11
|
||||
</small></address>
|
||||
</body>
|
||||
</html>
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue