Skip to content

Commit

Permalink
Fixes for running Phi-4 quantized. (#2714)
Browse files Browse the repository at this point in the history
  • Loading branch information
janimo authored Jan 13, 2025
1 parent 461e8c1 commit ab7ff70
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 2 deletions.
6 changes: 5 additions & 1 deletion candle-examples/examples/quantized-phi/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ enum Which {
/// Alternative implementation of phi-3, based on llama.
#[value(name = "phi-3b")]
Phi3b,
#[value(name = "phi-4")]
Phi4,
}

#[derive(Parser, Debug)]
Expand Down Expand Up @@ -104,6 +106,7 @@ impl Args {
let repo = match self.which {
Which::Phi2 => "microsoft/phi-2",
Which::Phi3 | Which::Phi3b => "microsoft/Phi-3-mini-4k-instruct",
Which::Phi4 => "microsoft/phi-4",
};
let api = api.model(repo.to_string());
api.get("tokenizer.json")?
Expand All @@ -128,6 +131,7 @@ impl Args {
"Phi-3-mini-4k-instruct-q4.gguf",
"5eef2ce24766d31909c0b269fe90c817a8f263fb",
),
Which::Phi4 => ("microsoft/phi-4-gguf", "phi-4-q4.gguf", "main"),
};
let api = hf_hub::api::sync::Api::new()?;
api.repo(hf_hub::Repo::with_revision(
Expand Down Expand Up @@ -216,7 +220,7 @@ fn main() -> anyhow::Result<()> {
);
match args.which {
Which::Phi2 => Model::Phi2(Phi2::from_gguf(model, &mut file, &device)?),
Which::Phi3 => Model::Phi3(Phi3::from_gguf(
Which::Phi3 | Which::Phi4 => Model::Phi3(Phi3::from_gguf(
args.use_flash_attn,
model,
&mut file,
Expand Down
2 changes: 1 addition & 1 deletion candle-transformers/src/models/quantized_phi3.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ impl LayerWeights {
.reshape((b_sz, seq_len, self.n_head, self.head_dim))?
.transpose(1, 2)?;
let k = k
.reshape((b_sz, seq_len, self.n_head, self.head_dim))?
.reshape((b_sz, seq_len, self.n_kv_head, self.head_dim))?
.transpose(1, 2)?;
let v = v
.reshape((b_sz, seq_len, self.n_kv_head, self.head_dim))?
Expand Down

0 comments on commit ab7ff70

Please sign in to comment.