identify files manually by magic numbers

This commit is contained in:
🪞👃🪞 2025-03-11 14:59:11 +02:00
parent 5d7c2054b3
commit 928d38bfaa
5 changed files with 154 additions and 99 deletions

13
Cargo.lock generated
View file

@ -347,6 +347,11 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0"
[[package]]
name = "enolib"
version = "0.5.0"
source = "git+https://codeberg.org/simonrepp/enolib-rs?tag=0.5.0#011a93092e5127c9354d643e9ac51ff592cf59f2"
[[package]] [[package]]
name = "equivalent" name = "equivalent"
version = "1.0.2" version = "1.0.2"
@ -662,6 +667,12 @@ dependencies = [
"windows-sys 0.59.0", "windows-sys 0.59.0",
] ]
[[package]]
name = "opus_headers"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "afbb993947f111397c2bc536944f8dac7f54a4e73383d478efe1990b56404b60"
[[package]] [[package]]
name = "pad" name = "pad"
version = "0.1.6" version = "0.1.6"
@ -1054,10 +1065,12 @@ name = "taggart"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"clap", "clap",
"enolib",
"file_type", "file_type",
"hex", "hex",
"id3", "id3",
"opener", "opener",
"opus_headers",
"pad", "pad",
"tek_tui", "tek_tui",
"unicode-width 0.2.0", "unicode-width 0.2.0",

View file

@ -5,16 +5,18 @@ edition = "2024"
[dependencies] [dependencies]
tek_tui = { git = "https://codeberg.org/unspeaker/tengri", rev = "6cd85ef" } tek_tui = { git = "https://codeberg.org/unspeaker/tengri", rev = "6cd85ef" }
enolib = { git = "https://codeberg.org/simonrepp/enolib-rs", tag = "0.5.0" }
clap = { version = "4.5.4", features = [ "cargo" ] }
walkdir = "2"
id3 = "1.16"
#moku = "0.2"
file_type = "0.7"
pad = "0.1"
#sha2 = "0.10"
hex = "0.4"
xxhash-rust = { version = "0.8.5", features = ["xxh3"] }
#base64 = "0.22" #base64 = "0.22"
unicode-width = "0.2" #moku = "0.2"
#sha2 = "0.10"
clap = { version = "4.5.4", features = [ "cargo" ] }
file_type = "0.7"
hex = "0.4"
id3 = "1.16"
opener = "0.7" opener = "0.7"
opus_headers = "0.1.2"
pad = "0.1"
unicode-width = "0.2"
walkdir = "2"
xxhash-rust = { version = "0.8.5", features = ["xxh3"] }

View file

@ -27,7 +27,11 @@ pub(crate) const PAGE_SIZE: usize = 10;
fn cli () -> clap::Command { fn cli () -> clap::Command {
command!() command!()
.arg(arg!([path] "Path to root directory").value_parser(value_parser!(PathBuf))) .arg(arg!([path] "Path to root directory")
.value_parser(value_parser!(PathBuf)))
//.arg(arg!(-j --threads <N> "Number of indexing threads")
//.required(false)
//.value_parser(value_parser!(usize)))
} }
fn main () -> Usually<()> { fn main () -> Usually<()> {

View file

@ -13,6 +13,49 @@ pub struct Taggart {
pub editing: Option<(usize, String)>, pub editing: Option<(usize, String)>,
} }
#[derive(Ord, Eq, PartialEq, PartialOrd)]
pub struct Entry {
pub path: PathBuf,
pub depth: usize,
pub info: EntryInfo,
}
#[derive(Ord, Eq, PartialEq, PartialOrd)]
pub enum EntryInfo {
Directory {
hash_file: Option<()>,
catalog_file: Option<()>,
artist_file: Option<()>,
release_file: Option<()>,
},
Music {
hash: Arc<str>,
file_type: Option<&'static FileType>,
artist: Option<Arc<str>>,
album: Option<Arc<str>>,
track: Option<u32>,
title: Option<Arc<str>>,
date: Option<Arc<str>>,
year: Option<i32>,
people: Option<Vec<Arc<str>>>,
publisher: Option<Arc<str>>,
key: Option<Arc<str>>,
bpm: Option<Arc<str>>,
invalid: bool,
},
Image {
hash: Arc<str>,
file_type: Option<&'static FileType>,
title: Option<String>,
author: Option<String>,
invalid: bool,
},
Unknown {
hash: Arc<str>,
file_type: Option<&'static FileType>,
}
}
impl Taggart { impl Taggart {
pub fn new (root: &impl AsRef<Path>) -> Usually<Self> { pub fn new (root: &impl AsRef<Path>) -> Usually<Self> {
Ok(Self { Ok(Self {
@ -66,49 +109,6 @@ impl Taggart {
} }
} }
#[derive(Ord, Eq, PartialEq, PartialOrd)]
pub struct Entry {
pub path: PathBuf,
pub depth: usize,
pub info: EntryInfo,
}
#[derive(Ord, Eq, PartialEq, PartialOrd)]
pub enum EntryInfo {
Directory {
hash_file: Option<()>,
catalog_file: Option<()>,
artist_file: Option<()>,
release_file: Option<()>,
},
Music {
hash: Arc<str>,
file_type: &'static FileType,
artist: Option<Arc<str>>,
album: Option<Arc<str>>,
track: Option<u32>,
title: Option<Arc<str>>,
date: Option<Arc<str>>,
year: Option<i32>,
people: Option<Vec<Arc<str>>>,
publisher: Option<Arc<str>>,
key: Option<Arc<str>>,
bpm: Option<Arc<str>>,
invalid: bool,
},
Image {
hash: Arc<str>,
file_type: &'static FileType,
title: Option<String>,
author: Option<String>,
invalid: bool,
},
Unknown {
hash: Arc<str>,
file_type: &'static FileType,
}
}
impl Entry { impl Entry {
pub fn new (root: &impl AsRef<Path>, entry: &DirEntry) -> Perhaps<Self> { pub fn new (root: &impl AsRef<Path>, entry: &DirEntry) -> Perhaps<Self> {
println!("{}", entry.path().display()); println!("{}", entry.path().display());
@ -133,51 +133,10 @@ impl Entry {
})) }))
} }
fn new_file (root: &impl AsRef<Path>, entry: &DirEntry) -> Perhaps<Self> { fn new_file (root: &impl AsRef<Path>, entry: &DirEntry) -> Perhaps<Self> {
let bytes = read(entry.path())?; Ok(Some(Self {
let hash = hex::encode(xxh3_64(&bytes).to_be_bytes());
let file_type = FileType::try_from_reader(&*bytes)?;
let mime_type = file_type.media_types().get(0);
return Ok(Some(Self {
depth: entry.depth(), depth: entry.depth(),
path: entry.path().strip_prefix(root.as_ref())?.into(), path: entry.path().strip_prefix(root.as_ref())?.into(),
info: match mime_type { info: EntryInfo::new(&read(entry.path())?)?
Some(&"audio/mpeg3") => {
let id3 = Tag::read_from_path(entry.path())?;
EntryInfo::Music {
file_type,
hash: hash.into(),
artist: id3.artist().map(|x|x.into()),
album: id3.album().map(|x|x.into()),
track: id3.track().map(|x|x.into()),
title: id3.title().map(|x|x.into()),
date: None,
year: id3.year().map(|x|x.into()),
people: None,
publisher: None,
key: None,
bpm: None,
invalid: false,
}
},
Some(&"image/png") => EntryInfo::Image {
file_type,
hash: hash.into(),
title: None,
author: None,
invalid: false,
},
Some(&"image/jpeg") => EntryInfo::Image {
file_type,
hash: hash.into(),
title: None,
author: None,
invalid: false,
},
_ => EntryInfo::Unknown {
file_type,
hash: hash.into(),
}
},
})) }))
} }
pub fn is_dir (&self) -> bool { pub fn is_dir (&self) -> bool {
@ -221,3 +180,80 @@ impl Entry {
} }
} }
} }
impl EntryInfo {
pub fn new (bytes: &[u8]) -> Usually<Self> {
// MP3 with ID3v2
if bytes.starts_with(&[b'I', b'D', b'3']) {
#[allow(deprecated)]
let id3 = Tag::read_from(bytes)?;
return Ok(Self::Music {
hash: hex::encode(xxh3_64(&bytes).to_be_bytes()).into(),
file_type: None,
artist: id3.artist().map(|x|x.into()),
album: id3.album().map(|x|x.into()),
track: id3.track().map(|x|x.into()),
title: id3.title().map(|x|x.into()),
year: id3.year().map(|x|x.into()),
date: None,
people: None,
publisher: None,
key: None,
bpm: None,
invalid: false,
})
}
// Ogg (Opus)
if bytes.starts_with(&[b'O', b'g', b'g', b'S']) {
let headers = opus_headers::parse_from_read(bytes)?;
println!("{headers:?}");
return Ok(Self::Music {
hash: hex::encode(xxh3_64(&bytes).to_be_bytes()).into(),
file_type: None,
artist: None,
album: None,
track: None,
title: None,
date: None,
year: None,
people: None,
publisher: None,
key: None,
bpm: None,
invalid: false,
})
}
// PNG
if bytes.starts_with(&[0x89, b'P', b'N', b'G', 0x0D, 0x0A, 0x1A, 0x0A]) {
return Ok(Self::Image {
file_type: None,
hash: hex::encode(xxh3_64(&bytes).to_be_bytes()).into(),
title: None,
author: None,
invalid: false,
})
}
// JPG
if bytes.starts_with(&[0xFF, 0xD8, 0xFF, 0xDB])
|| bytes.starts_with(&[0xFF, 0xD8, 0xFF, 0xE0,
0x00, 0x10, 0x4A, 0x46,
0x49, 0x46, 0x00, 0x01])
|| bytes.starts_with(&[0xFF, 0xD8, 0xFF, 0xEE])
|| (bytes.starts_with(&[0xFF, 0xD8, 0xFF, 0xE1]) &&
bytes.get(6) == Some(&0x45) && bytes.get(7) == Some(&0x78) &&
bytes.get(8) == Some(&0x69) && bytes.get(9) == Some(&0x66) &&
bytes.get(10) == Some(&0x00) && bytes.get(11) == Some(&0x00))
{
return Ok(Self::Image {
file_type: None,
hash: hex::encode(xxh3_64(&bytes).to_be_bytes()).into(),
title: None,
author: None,
invalid: false,
})
}
Ok(Self::Unknown {
file_type: None,
hash: hex::encode(xxh3_64(&bytes).to_be_bytes()).into(),
})
}
}

View file

@ -101,7 +101,7 @@ impl<'a> Content<TuiOut> for TreeTable<'a> {
to.fill_bg(fill, Color::Rgb(192, 128, 0)); to.fill_bg(fill, Color::Rgb(192, 128, 0));
let fill = [area.x() + x as u16, y, w, 1]; let fill = [area.x() + x as u16, y, w, 1];
to.fill_bg(fill, Color::Rgb(224, 192, 0)); to.fill_bg(fill, Color::Rgb(224, 192, 0));
if let Some((index, value)) = &self.0.editing { if let Some((_index, value)) = &self.0.editing {
let x = area.x() + if x > 0 { x + 1 } else { x } as u16; let x = area.x() + if x > 0 { x + 1 } else { x } as u16;
to.blit(&value, x, y, None) to.blit(&value, x, y, None)
} }