added support for batch_size
This commit is contained in:
parent
f5a52a93c3
commit
71d370b63a
3
.gitignore
vendored
3
.gitignore
vendored
@ -2,4 +2,5 @@ target
|
||||
.classpath
|
||||
.project
|
||||
.vscode
|
||||
.claude
|
||||
.claude
|
||||
.codex
|
||||
@ -1,5 +1,5 @@
|
||||
#Llama Runner Configuration
|
||||
#Thu Mar 26 18:27:22 CET 2026
|
||||
#Sun Mar 29 16:13:49 CEST 2026
|
||||
windowHeight=1189
|
||||
windowWidth=711
|
||||
windowX=1849
|
||||
|
||||
@ -19,6 +19,7 @@ public class CommandBuilder {
|
||||
cmd.append(" --host ").append(config.getHost());
|
||||
cmd.append(" --port ").append(config.getPort());
|
||||
cmd.append(" --parallel ").append(config.getParallel());
|
||||
cmd.append(" --batch-size ").append(config.getBatchSize());
|
||||
cmd.append(" -t ").append(config.getThreads());
|
||||
cmd.append(" -fa ").append(config.isFlashAttention() ? "on" : "off");
|
||||
cmd.append(" --temp ").append(config.getTemperature());
|
||||
|
||||
@ -52,6 +52,7 @@ public class Main extends JFrame {
|
||||
private JTextField hostField;
|
||||
private JTextField portField;
|
||||
private JTextField parallelField;
|
||||
private JTextField batchSizeField;
|
||||
private JTextField threadsField;
|
||||
private JCheckBox flashAttnCheckBox;
|
||||
private JCheckBox kvUnifiedCheckBox;
|
||||
@ -332,6 +333,7 @@ public class Main extends JFrame {
|
||||
config.setHost(hostField.getText());
|
||||
config.setPort(Integer.parseInt(portField.getText()));
|
||||
config.setParallel(Integer.parseInt(parallelField.getText()));
|
||||
config.setBatchSize(Integer.parseInt(batchSizeField.getText()));
|
||||
config.setThreads(Integer.parseInt(threadsField.getText()));
|
||||
config.setFlashAttention(flashAttnCheckBox.isSelected());
|
||||
config.setKvUnified(kvUnifiedCheckBox.isSelected());
|
||||
@ -365,6 +367,7 @@ public class Main extends JFrame {
|
||||
hostField.setText(config.getHost());
|
||||
portField.setText(String.valueOf(config.getPort()));
|
||||
parallelField.setText(String.valueOf(config.getParallel()));
|
||||
batchSizeField.setText(String.valueOf(config.getBatchSize()));
|
||||
threadsField.setText(String.valueOf(config.getThreads()));
|
||||
flashAttnCheckBox.setSelected(config.isFlashAttention());
|
||||
kvUnifiedCheckBox.setSelected(config.isKvUnified());
|
||||
@ -710,6 +713,18 @@ public class Main extends JFrame {
|
||||
gbc.gridx = 0;
|
||||
gbc.gridy = 1;
|
||||
gbc.weightx = 0;
|
||||
panel.add(new JLabel("Batch Size:"), gbc);
|
||||
|
||||
gbc.gridx = 1;
|
||||
gbc.weightx = 1.0;
|
||||
batchSizeField = new JTextField("4096", 10);
|
||||
batchSizeField.setCaretColor(Color.WHITE);
|
||||
batchSizeField.addActionListener(e -> updateCommandPreview());
|
||||
panel.add(batchSizeField, gbc);
|
||||
|
||||
gbc.gridx = 0;
|
||||
gbc.gridy = 2;
|
||||
gbc.weightx = 0;
|
||||
panel.add(new JLabel("Threads:"), gbc);
|
||||
|
||||
gbc.gridx = 1;
|
||||
@ -720,17 +735,17 @@ public class Main extends JFrame {
|
||||
panel.add(threadsField, gbc);
|
||||
|
||||
gbc.gridx = 0;
|
||||
gbc.gridy = 2;
|
||||
gbc.gridy = 3;
|
||||
gbc.weightx = 0;
|
||||
panel.add(new JLabel(), gbc);
|
||||
|
||||
gbc.gridx = 1;
|
||||
gbc.gridy = 2;
|
||||
gbc.gridy = 3;
|
||||
gbc.weightx = 1.0;
|
||||
panel.add(new JLabel(), gbc);
|
||||
|
||||
gbc.gridx = 0;
|
||||
gbc.gridy = 3;
|
||||
gbc.gridy = 4;
|
||||
gbc.weightx = 0;
|
||||
flashAttnCheckBox = new JCheckBox("Flash Attention", true);
|
||||
flashAttnCheckBox.addActionListener(e -> updateCommandPreview());
|
||||
@ -741,7 +756,7 @@ public class Main extends JFrame {
|
||||
panel.add(new JLabel(), gbc);
|
||||
|
||||
gbc.gridx = 0;
|
||||
gbc.gridy = 4;
|
||||
gbc.gridy = 5;
|
||||
gbc.weightx = 0;
|
||||
kvUnifiedCheckBox = new JCheckBox("KV Unified", true);
|
||||
kvUnifiedCheckBox.addActionListener(e -> updateCommandPreview());
|
||||
@ -752,7 +767,7 @@ public class Main extends JFrame {
|
||||
panel.add(new JLabel(), gbc);
|
||||
|
||||
gbc.gridx = 0;
|
||||
gbc.gridy = 5;
|
||||
gbc.gridy = 6;
|
||||
gbc.weightx = 0;
|
||||
panel.add(new JLabel("Cache K:"), gbc);
|
||||
|
||||
@ -764,7 +779,7 @@ public class Main extends JFrame {
|
||||
panel.add(cacheTypeKComboBox, gbc);
|
||||
|
||||
gbc.gridx = 0;
|
||||
gbc.gridy = 6;
|
||||
gbc.gridy = 7;
|
||||
gbc.weightx = 0;
|
||||
panel.add(new JLabel("Cache V:"), gbc);
|
||||
|
||||
@ -776,7 +791,7 @@ public class Main extends JFrame {
|
||||
panel.add(cacheTypeVComboBox, gbc);
|
||||
|
||||
gbc.gridx = 0;
|
||||
gbc.gridy = 7;
|
||||
gbc.gridy = 8;
|
||||
gbc.weightx = 0;
|
||||
panel.add(new JLabel("GPU Layers:"), gbc);
|
||||
|
||||
@ -789,7 +804,7 @@ public class Main extends JFrame {
|
||||
|
||||
// Fit parameter
|
||||
gbc.gridx = 0;
|
||||
gbc.gridy = 8;
|
||||
gbc.gridy = 9;
|
||||
gbc.weightx = 0;
|
||||
panel.add(new JLabel("Fit:"), gbc);
|
||||
|
||||
@ -800,7 +815,7 @@ public class Main extends JFrame {
|
||||
panel.add(fitCheckBox, gbc);
|
||||
|
||||
gbc.gridx = 0;
|
||||
gbc.gridy = 9;
|
||||
gbc.gridy = 10;
|
||||
gbc.weightx = 0;
|
||||
panel.add(new JLabel("Reasoning:"), gbc);
|
||||
|
||||
|
||||
@ -7,10 +7,12 @@ import java.io.Serializable;
|
||||
*/
|
||||
public class ModelConfig implements Serializable {
|
||||
private static final long serialVersionUID = 1L;
|
||||
private static final int DEFAULT_BATCH_SIZE = 4096;
|
||||
|
||||
private String host;
|
||||
private int port;
|
||||
private int parallel;
|
||||
private int batchSize;
|
||||
private int threads;
|
||||
private boolean flashAttention;
|
||||
private boolean kvUnified;
|
||||
@ -32,6 +34,7 @@ public class ModelConfig implements Serializable {
|
||||
this.host = "0.0.0.0";
|
||||
this.port = 3080;
|
||||
this.parallel = 1;
|
||||
this.batchSize = DEFAULT_BATCH_SIZE;
|
||||
this.threads = 99;
|
||||
this.flashAttention = true;
|
||||
this.kvUnified = true;
|
||||
@ -60,6 +63,9 @@ public class ModelConfig implements Serializable {
|
||||
public int getParallel() { return parallel; }
|
||||
public void setParallel(int parallel) { this.parallel = parallel; }
|
||||
|
||||
public int getBatchSize() { return batchSize > 0 ? batchSize : DEFAULT_BATCH_SIZE; }
|
||||
public void setBatchSize(int batchSize) { this.batchSize = batchSize; }
|
||||
|
||||
public int getThreads() { return threads; }
|
||||
public void setThreads(int threads) { this.threads = threads; }
|
||||
|
||||
@ -114,6 +120,7 @@ public class ModelConfig implements Serializable {
|
||||
"host='" + host + '\'' +
|
||||
", port=" + port +
|
||||
", parallel=" + parallel +
|
||||
", batchSize=" + getBatchSize() +
|
||||
", threads=" + threads +
|
||||
", flashAttention=" + flashAttention +
|
||||
", kvUnified=" + kvUnified +
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user