added support for turboquant
This commit is contained in:
parent
71d370b63a
commit
1e6b33175a
@ -1,5 +1,5 @@
|
|||||||
#Llama Runner Configuration
|
#Llama Runner Configuration
|
||||||
#Sun Mar 29 16:13:49 CEST 2026
|
#Sun Mar 29 17:31:07 CEST 2026
|
||||||
windowHeight=1189
|
windowHeight=1189
|
||||||
windowWidth=711
|
windowWidth=711
|
||||||
windowX=1849
|
windowX=1849
|
||||||
|
|||||||
@ -6,8 +6,8 @@
|
|||||||
"threads": 99,
|
"threads": 99,
|
||||||
"flashAttention": true,
|
"flashAttention": true,
|
||||||
"kvUnified": true,
|
"kvUnified": true,
|
||||||
"cacheTypeK": "q8_0",
|
"cacheTypeK": "turbo3",
|
||||||
"cacheTypeV": "q8_0",
|
"cacheTypeV": "turbo3",
|
||||||
"temperature": 0.6,
|
"temperature": 0.6,
|
||||||
"topP": 0.95,
|
"topP": 0.95,
|
||||||
"topK": 20,
|
"topK": 20,
|
||||||
@ -27,8 +27,8 @@
|
|||||||
"threads": 99,
|
"threads": 99,
|
||||||
"flashAttention": true,
|
"flashAttention": true,
|
||||||
"kvUnified": true,
|
"kvUnified": true,
|
||||||
"cacheTypeK": "q8_0",
|
"cacheTypeK": "turbo3",
|
||||||
"cacheTypeV": "q8_0",
|
"cacheTypeV": "turbo3",
|
||||||
"temperature": 0.6,
|
"temperature": 0.6,
|
||||||
"topP": 0.95,
|
"topP": 0.95,
|
||||||
"topK": 20,
|
"topK": 20,
|
||||||
@ -48,8 +48,8 @@
|
|||||||
"threads": 99,
|
"threads": 99,
|
||||||
"flashAttention": true,
|
"flashAttention": true,
|
||||||
"kvUnified": true,
|
"kvUnified": true,
|
||||||
"cacheTypeK": "q8_0",
|
"cacheTypeK": "turbo3",
|
||||||
"cacheTypeV": "q8_0",
|
"cacheTypeV": "turbo3",
|
||||||
"temperature": 0.6,
|
"temperature": 0.6,
|
||||||
"topP": 0.95,
|
"topP": 0.95,
|
||||||
"topK": 20,
|
"topK": 20,
|
||||||
@ -69,8 +69,8 @@
|
|||||||
"threads": 99,
|
"threads": 99,
|
||||||
"flashAttention": true,
|
"flashAttention": true,
|
||||||
"kvUnified": true,
|
"kvUnified": true,
|
||||||
"cacheTypeK": "q8_0",
|
"cacheTypeK": "turbo3",
|
||||||
"cacheTypeV": "q8_0",
|
"cacheTypeV": "turbo3",
|
||||||
"temperature": 0.6,
|
"temperature": 0.6,
|
||||||
"topP": 0.95,
|
"topP": 0.95,
|
||||||
"topK": 20,
|
"topK": 20,
|
||||||
@ -90,8 +90,8 @@
|
|||||||
"threads": 99,
|
"threads": 99,
|
||||||
"flashAttention": true,
|
"flashAttention": true,
|
||||||
"kvUnified": true,
|
"kvUnified": true,
|
||||||
"cacheTypeK": "q8_0",
|
"cacheTypeK": "turbo3",
|
||||||
"cacheTypeV": "q8_0",
|
"cacheTypeV": "turbo3",
|
||||||
"temperature": 0.6,
|
"temperature": 0.6,
|
||||||
"topP": 0.95,
|
"topP": 0.95,
|
||||||
"topK": 20,
|
"topK": 20,
|
||||||
@ -132,8 +132,8 @@
|
|||||||
"threads": 99,
|
"threads": 99,
|
||||||
"flashAttention": true,
|
"flashAttention": true,
|
||||||
"kvUnified": true,
|
"kvUnified": true,
|
||||||
"cacheTypeK": "q8_0",
|
"cacheTypeK": "turbo3",
|
||||||
"cacheTypeV": "q8_0",
|
"cacheTypeV": "turbo3",
|
||||||
"temperature": 0.6,
|
"temperature": 0.6,
|
||||||
"topP": 0.95,
|
"topP": 0.95,
|
||||||
"topK": 20,
|
"topK": 20,
|
||||||
@ -153,8 +153,8 @@
|
|||||||
"threads": 99,
|
"threads": 99,
|
||||||
"flashAttention": true,
|
"flashAttention": true,
|
||||||
"kvUnified": true,
|
"kvUnified": true,
|
||||||
"cacheTypeK": "q8_0",
|
"cacheTypeK": "turbo3",
|
||||||
"cacheTypeV": "q8_0",
|
"cacheTypeV": "turbo3",
|
||||||
"temperature": 0.6,
|
"temperature": 0.6,
|
||||||
"topP": 0.95,
|
"topP": 0.95,
|
||||||
"topK": 20,
|
"topK": 20,
|
||||||
@ -174,8 +174,8 @@
|
|||||||
"threads": 99,
|
"threads": 99,
|
||||||
"flashAttention": true,
|
"flashAttention": true,
|
||||||
"kvUnified": true,
|
"kvUnified": true,
|
||||||
"cacheTypeK": "q8_0",
|
"cacheTypeK": "turbo3",
|
||||||
"cacheTypeV": "q8_0",
|
"cacheTypeV": "turbo3",
|
||||||
"temperature": 0.6,
|
"temperature": 0.6,
|
||||||
"topP": 0.95,
|
"topP": 0.95,
|
||||||
"topK": 20,
|
"topK": 20,
|
||||||
@ -195,8 +195,8 @@
|
|||||||
"threads": 99,
|
"threads": 99,
|
||||||
"flashAttention": true,
|
"flashAttention": true,
|
||||||
"kvUnified": true,
|
"kvUnified": true,
|
||||||
"cacheTypeK": "q8_0",
|
"cacheTypeK": "turbo3",
|
||||||
"cacheTypeV": "q8_0",
|
"cacheTypeV": "turbo3",
|
||||||
"temperature": 0.6,
|
"temperature": 0.6,
|
||||||
"topP": 0.95,
|
"topP": 0.95,
|
||||||
"topK": 20,
|
"topK": 20,
|
||||||
@ -216,8 +216,8 @@
|
|||||||
"threads": 99,
|
"threads": 99,
|
||||||
"flashAttention": true,
|
"flashAttention": true,
|
||||||
"kvUnified": true,
|
"kvUnified": true,
|
||||||
"cacheTypeK": "q8_0",
|
"cacheTypeK": "turbo3",
|
||||||
"cacheTypeV": "q8_0",
|
"cacheTypeV": "turbo3",
|
||||||
"temperature": 0.6,
|
"temperature": 0.6,
|
||||||
"topP": 0.95,
|
"topP": 0.95,
|
||||||
"topK": 20,
|
"topK": 20,
|
||||||
|
|||||||
@ -773,7 +773,7 @@ public class Main extends JFrame {
|
|||||||
|
|
||||||
gbc.gridx = 1;
|
gbc.gridx = 1;
|
||||||
gbc.weightx = 1.0;
|
gbc.weightx = 1.0;
|
||||||
cacheTypeKComboBox = new JComboBox<>(new String[] { "bf16", "f16", "f32", "f8", "q8_0" });
|
cacheTypeKComboBox = new JComboBox<>(new String[] { "bf16", "f16", "f32", "f8", "q8_0", "turbo3", "turbo4" });
|
||||||
cacheTypeKComboBox.setSelectedIndex(0);
|
cacheTypeKComboBox.setSelectedIndex(0);
|
||||||
cacheTypeKComboBox.addActionListener(e -> updateCommandPreview());
|
cacheTypeKComboBox.addActionListener(e -> updateCommandPreview());
|
||||||
panel.add(cacheTypeKComboBox, gbc);
|
panel.add(cacheTypeKComboBox, gbc);
|
||||||
@ -785,7 +785,7 @@ public class Main extends JFrame {
|
|||||||
|
|
||||||
gbc.gridx = 1;
|
gbc.gridx = 1;
|
||||||
gbc.weightx = 1.0;
|
gbc.weightx = 1.0;
|
||||||
cacheTypeVComboBox = new JComboBox<>(new String[] { "bf16", "f16", "f32", "f8", "q8_0" });
|
cacheTypeVComboBox = new JComboBox<>(new String[] { "bf16", "f16", "f32", "f8", "q8_0", "turbo3", "turbo4" });
|
||||||
cacheTypeVComboBox.setSelectedIndex(0);
|
cacheTypeVComboBox.setSelectedIndex(0);
|
||||||
cacheTypeVComboBox.addActionListener(e -> updateCommandPreview());
|
cacheTypeVComboBox.addActionListener(e -> updateCommandPreview());
|
||||||
panel.add(cacheTypeVComboBox, gbc);
|
panel.add(cacheTypeVComboBox, gbc);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user