# 最小化复现代码(2卡场景)
from vllm import LLM, SamplingParams
from vllm.config import KVTransferConfig
# 生产者节点
ktc_producer = KVTransferConfig.from_cli(
'{"kv_connector":"PyNcclConnector","kv_role":"kv_producer","kv_rank":0,"kv_parallel_size":2}'
)
llm_producer = LLM(model="/root/model/Mistral-7B-Instruct-v0.3", kv_transfer_config=ktc_producer)
# 消费者节点
ktc_consumer = KVTransferConfig.from_cli(
'{"kv_connector":"PyNcclConnector","kv_role":"kv_consumer","kv_rank":1,"kv_parallel_size":2}'
)
llm_consumer = LLM(model="/root/model/Mistral-7B-Instruct-v0.3", kv_transfer_config=ktc_consumer)