FAQ | This is a LIVE service | Changelog

Skip to content
Snippets Groups Projects

Draft: InfiniBand Support (Antelope)

Open John Garbutt requested to merge arcus/2023.1-staging-ib into arcus/2023.1-production
Compare and
22 files
+ 455
35
Compare changes
  • Side-by-side
  • Inline
Files
22
+ 122
0
#!/bin/bash
# This script is NOT required when networking-mlnx is available.
# This is required ONLY when treating IB VFs as PCI passthrough.
# This bash script initialises Infiniband VFs.
# It expects an ib device followed by the number of VFs.
# This can be repeated for each ib device.
# e.g "mlnx-ib-sriov.sh mlx5_0 8 mlx5_1 10"
# Will create 8 VFs from mlx5_0 and 10 from mlx5_1
# The VFs are assigned port and node GUIDs based on the primary IP
# of the the hypervisor.
# Note that this script should only be run at boot time.
# Executing this script reinitialises existing VFs.
# IF VFs are already in use they will be disconnected.
set -euo pipefail
log() {
local msg="$1"
echo "$(date '+%Y-%m-%d %H:%M:%S') : $msg"
}
error_exit() {
log "Error: $1"
exit 1
}
validate_input() {
if [[ ! "$1" =~ ^mlx[0-9]+_[0-9]+$ ]]; then
error_exit "Invalid device name format: $1"
fi
if ! [[ "$2" =~ ^[0-9]+$ ]]; then
error_exit "Invalid number of virtual functions: $2"
fi
}
check_path() {
if [[ ! -e "$1" ]]; then
log "Warning: Path $1 does not exist"
return 1
fi
return 0
}
configure_dev() {
local dev_name="$1"
local num_of_vfs="$2"
local devid
devid=$(echo "$dev_name" | cut -d_ -f2)
local num_vfs_path="/sys/class/infiniband/$dev_name/device/sriov_numvfs"
log "Configuring $dev_name with $num_of_vfs virtual functions"
echo 0 > "$num_vfs_path" || error_exit "Failed to reset VFs for $dev_name"
echo "$num_of_vfs" > "$num_vfs_path" || error_exit "Failed to set VFs for $dev_name"
local max_id
max_id=$((num_of_vfs-1))
for vf in $(seq 0 "$max_id"); do
log " Configuring virtual function $vf"
local vf_path="/sys/class/infiniband/$dev_name/device/sriov/$vf"
check_path "$vf_path/policy" && echo Follow > "$vf_path/policy" || continue
local first_part secondard_part ip_address guid_prefix guid_node guid_port
first_part=$((vf/100))
second_part=$((vf-first_part*100))
ip_address=$(ip -o -4 addr list | grep -v ' lo ' | awk '{print $4}' | cut -d/ -f1 | head -n 1)
# Convert IP address to hex and pad appropriately
IFS=. read -r ip1 ip2 ip3 ip4 <<< "$ip_address"
ip_hex=$(printf "%02x:%02x:%02x:%02x" "$ip1" "$ip2" "$ip3" "$ip4")
# Use 00:99 as VF prefix and format the rest of the GUID
guid_prefix=$(printf "00:99:%02x:%02x:%s" "$first_part" "$second_part" "$ip_hex")
guid_node=$(printf "%s:60" "$guid_prefix")
guid_port=$(printf "%s:61" "$guid_prefix")
log "guid_node: $guid_node"
log "guid_port: $guid_port"
check_path "$vf_path/node" && echo "$guid_node" > "$vf_path/node" || { log "Failed to write node GUID for VF $vf"; continue; }
check_path "$vf_path/port" && echo "$guid_port" > "$vf_path/port" || { log "Failed to write port GUID for VF $vf"; continue; }
local pcie_addr
pcie_addr=$(readlink -f "/sys/class/infiniband/$dev_name/device/virtfn${vf}" | awk -F/ '{print $NF}')
echo "$pcie_addr" > /sys/bus/pci/drivers/mlx5_core/unbind || error_exit "Failed to unbind driver for $pcie_addr"
echo "$pcie_addr" > /sys/bus/pci/drivers/mlx5_core/bind || error_exit "Failed to bind driver for $pcie_addr"
done
}
main() {
if [[ "$#" -eq 0 ]]; then
echo "Help:"
echo "This script expects the mlnx device and number of vfs as respective arguments."
echo "e.g. ./mlnx_ib_vf.sh mlx5_0 8"
log "No arguments provided. Exiting"
exit 1
elif ! (( $# % 2 )); then
log "Configuring SR-IOV for specified devices"
while (( "$#" )); do
local dev=$1
local num_of_vfs=$2
validate_input "$dev" "$num_of_vfs"
configure_dev "$dev" "$num_of_vfs"
shift 2
done
else
log "Usage:"
log " $0"
log " $0 mlx5_0 10 mlx5_1 25"
exit 1
fi
}
main "$@"
Loading