added support for batch_size
This commit is contained in:
parent
f5a52a93c3
commit
71d370b63a
3
.gitignore
vendored
3
.gitignore
vendored
@ -2,4 +2,5 @@ target
|
|||||||
.classpath
|
.classpath
|
||||||
.project
|
.project
|
||||||
.vscode
|
.vscode
|
||||||
.claude
|
.claude
|
||||||
|
.codex
|
||||||
@ -1,5 +1,5 @@
|
|||||||
#Llama Runner Configuration
|
#Llama Runner Configuration
|
||||||
#Thu Mar 26 18:27:22 CET 2026
|
#Sun Mar 29 16:13:49 CEST 2026
|
||||||
windowHeight=1189
|
windowHeight=1189
|
||||||
windowWidth=711
|
windowWidth=711
|
||||||
windowX=1849
|
windowX=1849
|
||||||
|
|||||||
@ -19,6 +19,7 @@ public class CommandBuilder {
|
|||||||
cmd.append(" --host ").append(config.getHost());
|
cmd.append(" --host ").append(config.getHost());
|
||||||
cmd.append(" --port ").append(config.getPort());
|
cmd.append(" --port ").append(config.getPort());
|
||||||
cmd.append(" --parallel ").append(config.getParallel());
|
cmd.append(" --parallel ").append(config.getParallel());
|
||||||
|
cmd.append(" --batch-size ").append(config.getBatchSize());
|
||||||
cmd.append(" -t ").append(config.getThreads());
|
cmd.append(" -t ").append(config.getThreads());
|
||||||
cmd.append(" -fa ").append(config.isFlashAttention() ? "on" : "off");
|
cmd.append(" -fa ").append(config.isFlashAttention() ? "on" : "off");
|
||||||
cmd.append(" --temp ").append(config.getTemperature());
|
cmd.append(" --temp ").append(config.getTemperature());
|
||||||
|
|||||||
@ -52,6 +52,7 @@ public class Main extends JFrame {
|
|||||||
private JTextField hostField;
|
private JTextField hostField;
|
||||||
private JTextField portField;
|
private JTextField portField;
|
||||||
private JTextField parallelField;
|
private JTextField parallelField;
|
||||||
|
private JTextField batchSizeField;
|
||||||
private JTextField threadsField;
|
private JTextField threadsField;
|
||||||
private JCheckBox flashAttnCheckBox;
|
private JCheckBox flashAttnCheckBox;
|
||||||
private JCheckBox kvUnifiedCheckBox;
|
private JCheckBox kvUnifiedCheckBox;
|
||||||
@ -332,6 +333,7 @@ public class Main extends JFrame {
|
|||||||
config.setHost(hostField.getText());
|
config.setHost(hostField.getText());
|
||||||
config.setPort(Integer.parseInt(portField.getText()));
|
config.setPort(Integer.parseInt(portField.getText()));
|
||||||
config.setParallel(Integer.parseInt(parallelField.getText()));
|
config.setParallel(Integer.parseInt(parallelField.getText()));
|
||||||
|
config.setBatchSize(Integer.parseInt(batchSizeField.getText()));
|
||||||
config.setThreads(Integer.parseInt(threadsField.getText()));
|
config.setThreads(Integer.parseInt(threadsField.getText()));
|
||||||
config.setFlashAttention(flashAttnCheckBox.isSelected());
|
config.setFlashAttention(flashAttnCheckBox.isSelected());
|
||||||
config.setKvUnified(kvUnifiedCheckBox.isSelected());
|
config.setKvUnified(kvUnifiedCheckBox.isSelected());
|
||||||
@ -365,6 +367,7 @@ public class Main extends JFrame {
|
|||||||
hostField.setText(config.getHost());
|
hostField.setText(config.getHost());
|
||||||
portField.setText(String.valueOf(config.getPort()));
|
portField.setText(String.valueOf(config.getPort()));
|
||||||
parallelField.setText(String.valueOf(config.getParallel()));
|
parallelField.setText(String.valueOf(config.getParallel()));
|
||||||
|
batchSizeField.setText(String.valueOf(config.getBatchSize()));
|
||||||
threadsField.setText(String.valueOf(config.getThreads()));
|
threadsField.setText(String.valueOf(config.getThreads()));
|
||||||
flashAttnCheckBox.setSelected(config.isFlashAttention());
|
flashAttnCheckBox.setSelected(config.isFlashAttention());
|
||||||
kvUnifiedCheckBox.setSelected(config.isKvUnified());
|
kvUnifiedCheckBox.setSelected(config.isKvUnified());
|
||||||
@ -710,6 +713,18 @@ public class Main extends JFrame {
|
|||||||
gbc.gridx = 0;
|
gbc.gridx = 0;
|
||||||
gbc.gridy = 1;
|
gbc.gridy = 1;
|
||||||
gbc.weightx = 0;
|
gbc.weightx = 0;
|
||||||
|
panel.add(new JLabel("Batch Size:"), gbc);
|
||||||
|
|
||||||
|
gbc.gridx = 1;
|
||||||
|
gbc.weightx = 1.0;
|
||||||
|
batchSizeField = new JTextField("4096", 10);
|
||||||
|
batchSizeField.setCaretColor(Color.WHITE);
|
||||||
|
batchSizeField.addActionListener(e -> updateCommandPreview());
|
||||||
|
panel.add(batchSizeField, gbc);
|
||||||
|
|
||||||
|
gbc.gridx = 0;
|
||||||
|
gbc.gridy = 2;
|
||||||
|
gbc.weightx = 0;
|
||||||
panel.add(new JLabel("Threads:"), gbc);
|
panel.add(new JLabel("Threads:"), gbc);
|
||||||
|
|
||||||
gbc.gridx = 1;
|
gbc.gridx = 1;
|
||||||
@ -720,17 +735,17 @@ public class Main extends JFrame {
|
|||||||
panel.add(threadsField, gbc);
|
panel.add(threadsField, gbc);
|
||||||
|
|
||||||
gbc.gridx = 0;
|
gbc.gridx = 0;
|
||||||
gbc.gridy = 2;
|
gbc.gridy = 3;
|
||||||
gbc.weightx = 0;
|
gbc.weightx = 0;
|
||||||
panel.add(new JLabel(), gbc);
|
panel.add(new JLabel(), gbc);
|
||||||
|
|
||||||
gbc.gridx = 1;
|
gbc.gridx = 1;
|
||||||
gbc.gridy = 2;
|
gbc.gridy = 3;
|
||||||
gbc.weightx = 1.0;
|
gbc.weightx = 1.0;
|
||||||
panel.add(new JLabel(), gbc);
|
panel.add(new JLabel(), gbc);
|
||||||
|
|
||||||
gbc.gridx = 0;
|
gbc.gridx = 0;
|
||||||
gbc.gridy = 3;
|
gbc.gridy = 4;
|
||||||
gbc.weightx = 0;
|
gbc.weightx = 0;
|
||||||
flashAttnCheckBox = new JCheckBox("Flash Attention", true);
|
flashAttnCheckBox = new JCheckBox("Flash Attention", true);
|
||||||
flashAttnCheckBox.addActionListener(e -> updateCommandPreview());
|
flashAttnCheckBox.addActionListener(e -> updateCommandPreview());
|
||||||
@ -741,7 +756,7 @@ public class Main extends JFrame {
|
|||||||
panel.add(new JLabel(), gbc);
|
panel.add(new JLabel(), gbc);
|
||||||
|
|
||||||
gbc.gridx = 0;
|
gbc.gridx = 0;
|
||||||
gbc.gridy = 4;
|
gbc.gridy = 5;
|
||||||
gbc.weightx = 0;
|
gbc.weightx = 0;
|
||||||
kvUnifiedCheckBox = new JCheckBox("KV Unified", true);
|
kvUnifiedCheckBox = new JCheckBox("KV Unified", true);
|
||||||
kvUnifiedCheckBox.addActionListener(e -> updateCommandPreview());
|
kvUnifiedCheckBox.addActionListener(e -> updateCommandPreview());
|
||||||
@ -752,7 +767,7 @@ public class Main extends JFrame {
|
|||||||
panel.add(new JLabel(), gbc);
|
panel.add(new JLabel(), gbc);
|
||||||
|
|
||||||
gbc.gridx = 0;
|
gbc.gridx = 0;
|
||||||
gbc.gridy = 5;
|
gbc.gridy = 6;
|
||||||
gbc.weightx = 0;
|
gbc.weightx = 0;
|
||||||
panel.add(new JLabel("Cache K:"), gbc);
|
panel.add(new JLabel("Cache K:"), gbc);
|
||||||
|
|
||||||
@ -764,7 +779,7 @@ public class Main extends JFrame {
|
|||||||
panel.add(cacheTypeKComboBox, gbc);
|
panel.add(cacheTypeKComboBox, gbc);
|
||||||
|
|
||||||
gbc.gridx = 0;
|
gbc.gridx = 0;
|
||||||
gbc.gridy = 6;
|
gbc.gridy = 7;
|
||||||
gbc.weightx = 0;
|
gbc.weightx = 0;
|
||||||
panel.add(new JLabel("Cache V:"), gbc);
|
panel.add(new JLabel("Cache V:"), gbc);
|
||||||
|
|
||||||
@ -776,7 +791,7 @@ public class Main extends JFrame {
|
|||||||
panel.add(cacheTypeVComboBox, gbc);
|
panel.add(cacheTypeVComboBox, gbc);
|
||||||
|
|
||||||
gbc.gridx = 0;
|
gbc.gridx = 0;
|
||||||
gbc.gridy = 7;
|
gbc.gridy = 8;
|
||||||
gbc.weightx = 0;
|
gbc.weightx = 0;
|
||||||
panel.add(new JLabel("GPU Layers:"), gbc);
|
panel.add(new JLabel("GPU Layers:"), gbc);
|
||||||
|
|
||||||
@ -789,7 +804,7 @@ public class Main extends JFrame {
|
|||||||
|
|
||||||
// Fit parameter
|
// Fit parameter
|
||||||
gbc.gridx = 0;
|
gbc.gridx = 0;
|
||||||
gbc.gridy = 8;
|
gbc.gridy = 9;
|
||||||
gbc.weightx = 0;
|
gbc.weightx = 0;
|
||||||
panel.add(new JLabel("Fit:"), gbc);
|
panel.add(new JLabel("Fit:"), gbc);
|
||||||
|
|
||||||
@ -800,7 +815,7 @@ public class Main extends JFrame {
|
|||||||
panel.add(fitCheckBox, gbc);
|
panel.add(fitCheckBox, gbc);
|
||||||
|
|
||||||
gbc.gridx = 0;
|
gbc.gridx = 0;
|
||||||
gbc.gridy = 9;
|
gbc.gridy = 10;
|
||||||
gbc.weightx = 0;
|
gbc.weightx = 0;
|
||||||
panel.add(new JLabel("Reasoning:"), gbc);
|
panel.add(new JLabel("Reasoning:"), gbc);
|
||||||
|
|
||||||
|
|||||||
@ -7,10 +7,12 @@ import java.io.Serializable;
|
|||||||
*/
|
*/
|
||||||
public class ModelConfig implements Serializable {
|
public class ModelConfig implements Serializable {
|
||||||
private static final long serialVersionUID = 1L;
|
private static final long serialVersionUID = 1L;
|
||||||
|
private static final int DEFAULT_BATCH_SIZE = 4096;
|
||||||
|
|
||||||
private String host;
|
private String host;
|
||||||
private int port;
|
private int port;
|
||||||
private int parallel;
|
private int parallel;
|
||||||
|
private int batchSize;
|
||||||
private int threads;
|
private int threads;
|
||||||
private boolean flashAttention;
|
private boolean flashAttention;
|
||||||
private boolean kvUnified;
|
private boolean kvUnified;
|
||||||
@ -32,6 +34,7 @@ public class ModelConfig implements Serializable {
|
|||||||
this.host = "0.0.0.0";
|
this.host = "0.0.0.0";
|
||||||
this.port = 3080;
|
this.port = 3080;
|
||||||
this.parallel = 1;
|
this.parallel = 1;
|
||||||
|
this.batchSize = DEFAULT_BATCH_SIZE;
|
||||||
this.threads = 99;
|
this.threads = 99;
|
||||||
this.flashAttention = true;
|
this.flashAttention = true;
|
||||||
this.kvUnified = true;
|
this.kvUnified = true;
|
||||||
@ -60,6 +63,9 @@ public class ModelConfig implements Serializable {
|
|||||||
public int getParallel() { return parallel; }
|
public int getParallel() { return parallel; }
|
||||||
public void setParallel(int parallel) { this.parallel = parallel; }
|
public void setParallel(int parallel) { this.parallel = parallel; }
|
||||||
|
|
||||||
|
public int getBatchSize() { return batchSize > 0 ? batchSize : DEFAULT_BATCH_SIZE; }
|
||||||
|
public void setBatchSize(int batchSize) { this.batchSize = batchSize; }
|
||||||
|
|
||||||
public int getThreads() { return threads; }
|
public int getThreads() { return threads; }
|
||||||
public void setThreads(int threads) { this.threads = threads; }
|
public void setThreads(int threads) { this.threads = threads; }
|
||||||
|
|
||||||
@ -114,6 +120,7 @@ public class ModelConfig implements Serializable {
|
|||||||
"host='" + host + '\'' +
|
"host='" + host + '\'' +
|
||||||
", port=" + port +
|
", port=" + port +
|
||||||
", parallel=" + parallel +
|
", parallel=" + parallel +
|
||||||
|
", batchSize=" + getBatchSize() +
|
||||||
", threads=" + threads +
|
", threads=" + threads +
|
||||||
", flashAttention=" + flashAttention +
|
", flashAttention=" + flashAttention +
|
||||||
", kvUnified=" + kvUnified +
|
", kvUnified=" + kvUnified +
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user