improve dml mode performance (20% faster, https://github.com/BlinkDL/ChatRWKV/pull/181)

2023-10-30 20:24:57 +08:00
parent f86b7f1f08
commit 14b90bb36b
1 changed files with 2 additions and 2 deletions
--- a/backend-python/rwkv_pip/utils.py
+++ b/backend-python/rwkv_pip/utils.py
@@ -81,8 +81,8 @@ class PIPELINE:
    def sample_logits(self, logits, temperature=1.0, top_p=0.85, top_k=0):
        probs = F.softmax(logits.float(), dim=-1)
        top_k = int(top_k)
-        if probs.device == torch.device("cpu"):
-            probs = probs.numpy()
+        if probs.device.type in ["cpu", "privateuseone"]:
+            probs = probs.cpu().numpy()
            sorted_ids = np.argsort(probs)
            sorted_probs = probs[sorted_ids][::-1]
            cumulative_probs = np.cumsum(sorted_probs)