identify files manually by magic numbers

This commit is contained in:
🪞👃🪞 2025-03-11 14:59:11 +02:00
parent 5d7c2054b3
commit 928d38bfaa
5 changed files with 154 additions and 99 deletions

View file

@ -13,6 +13,49 @@ pub struct Taggart {
pub editing: Option<(usize, String)>,
}
#[derive(Ord, Eq, PartialEq, PartialOrd)]
pub struct Entry {
pub path: PathBuf,
pub depth: usize,
pub info: EntryInfo,
}
#[derive(Ord, Eq, PartialEq, PartialOrd)]
pub enum EntryInfo {
Directory {
hash_file: Option<()>,
catalog_file: Option<()>,
artist_file: Option<()>,
release_file: Option<()>,
},
Music {
hash: Arc<str>,
file_type: Option<&'static FileType>,
artist: Option<Arc<str>>,
album: Option<Arc<str>>,
track: Option<u32>,
title: Option<Arc<str>>,
date: Option<Arc<str>>,
year: Option<i32>,
people: Option<Vec<Arc<str>>>,
publisher: Option<Arc<str>>,
key: Option<Arc<str>>,
bpm: Option<Arc<str>>,
invalid: bool,
},
Image {
hash: Arc<str>,
file_type: Option<&'static FileType>,
title: Option<String>,
author: Option<String>,
invalid: bool,
},
Unknown {
hash: Arc<str>,
file_type: Option<&'static FileType>,
}
}
impl Taggart {
pub fn new (root: &impl AsRef<Path>) -> Usually<Self> {
Ok(Self {
@ -66,49 +109,6 @@ impl Taggart {
}
}
#[derive(Ord, Eq, PartialEq, PartialOrd)]
pub struct Entry {
pub path: PathBuf,
pub depth: usize,
pub info: EntryInfo,
}
#[derive(Ord, Eq, PartialEq, PartialOrd)]
pub enum EntryInfo {
Directory {
hash_file: Option<()>,
catalog_file: Option<()>,
artist_file: Option<()>,
release_file: Option<()>,
},
Music {
hash: Arc<str>,
file_type: &'static FileType,
artist: Option<Arc<str>>,
album: Option<Arc<str>>,
track: Option<u32>,
title: Option<Arc<str>>,
date: Option<Arc<str>>,
year: Option<i32>,
people: Option<Vec<Arc<str>>>,
publisher: Option<Arc<str>>,
key: Option<Arc<str>>,
bpm: Option<Arc<str>>,
invalid: bool,
},
Image {
hash: Arc<str>,
file_type: &'static FileType,
title: Option<String>,
author: Option<String>,
invalid: bool,
},
Unknown {
hash: Arc<str>,
file_type: &'static FileType,
}
}
impl Entry {
pub fn new (root: &impl AsRef<Path>, entry: &DirEntry) -> Perhaps<Self> {
println!("{}", entry.path().display());
@ -133,51 +133,10 @@ impl Entry {
}))
}
fn new_file (root: &impl AsRef<Path>, entry: &DirEntry) -> Perhaps<Self> {
let bytes = read(entry.path())?;
let hash = hex::encode(xxh3_64(&bytes).to_be_bytes());
let file_type = FileType::try_from_reader(&*bytes)?;
let mime_type = file_type.media_types().get(0);
return Ok(Some(Self {
Ok(Some(Self {
depth: entry.depth(),
path: entry.path().strip_prefix(root.as_ref())?.into(),
info: match mime_type {
Some(&"audio/mpeg3") => {
let id3 = Tag::read_from_path(entry.path())?;
EntryInfo::Music {
file_type,
hash: hash.into(),
artist: id3.artist().map(|x|x.into()),
album: id3.album().map(|x|x.into()),
track: id3.track().map(|x|x.into()),
title: id3.title().map(|x|x.into()),
date: None,
year: id3.year().map(|x|x.into()),
people: None,
publisher: None,
key: None,
bpm: None,
invalid: false,
}
},
Some(&"image/png") => EntryInfo::Image {
file_type,
hash: hash.into(),
title: None,
author: None,
invalid: false,
},
Some(&"image/jpeg") => EntryInfo::Image {
file_type,
hash: hash.into(),
title: None,
author: None,
invalid: false,
},
_ => EntryInfo::Unknown {
file_type,
hash: hash.into(),
}
},
path: entry.path().strip_prefix(root.as_ref())?.into(),
info: EntryInfo::new(&read(entry.path())?)?
}))
}
pub fn is_dir (&self) -> bool {
@ -221,3 +180,80 @@ impl Entry {
}
}
}
impl EntryInfo {
pub fn new (bytes: &[u8]) -> Usually<Self> {
// MP3 with ID3v2
if bytes.starts_with(&[b'I', b'D', b'3']) {
#[allow(deprecated)]
let id3 = Tag::read_from(bytes)?;
return Ok(Self::Music {
hash: hex::encode(xxh3_64(&bytes).to_be_bytes()).into(),
file_type: None,
artist: id3.artist().map(|x|x.into()),
album: id3.album().map(|x|x.into()),
track: id3.track().map(|x|x.into()),
title: id3.title().map(|x|x.into()),
year: id3.year().map(|x|x.into()),
date: None,
people: None,
publisher: None,
key: None,
bpm: None,
invalid: false,
})
}
// Ogg (Opus)
if bytes.starts_with(&[b'O', b'g', b'g', b'S']) {
let headers = opus_headers::parse_from_read(bytes)?;
println!("{headers:?}");
return Ok(Self::Music {
hash: hex::encode(xxh3_64(&bytes).to_be_bytes()).into(),
file_type: None,
artist: None,
album: None,
track: None,
title: None,
date: None,
year: None,
people: None,
publisher: None,
key: None,
bpm: None,
invalid: false,
})
}
// PNG
if bytes.starts_with(&[0x89, b'P', b'N', b'G', 0x0D, 0x0A, 0x1A, 0x0A]) {
return Ok(Self::Image {
file_type: None,
hash: hex::encode(xxh3_64(&bytes).to_be_bytes()).into(),
title: None,
author: None,
invalid: false,
})
}
// JPG
if bytes.starts_with(&[0xFF, 0xD8, 0xFF, 0xDB])
|| bytes.starts_with(&[0xFF, 0xD8, 0xFF, 0xE0,
0x00, 0x10, 0x4A, 0x46,
0x49, 0x46, 0x00, 0x01])
|| bytes.starts_with(&[0xFF, 0xD8, 0xFF, 0xEE])
|| (bytes.starts_with(&[0xFF, 0xD8, 0xFF, 0xE1]) &&
bytes.get(6) == Some(&0x45) && bytes.get(7) == Some(&0x78) &&
bytes.get(8) == Some(&0x69) && bytes.get(9) == Some(&0x66) &&
bytes.get(10) == Some(&0x00) && bytes.get(11) == Some(&0x00))
{
return Ok(Self::Image {
file_type: None,
hash: hex::encode(xxh3_64(&bytes).to_be_bytes()).into(),
title: None,
author: None,
invalid: false,
})
}
Ok(Self::Unknown {
file_type: None,
hash: hex::encode(xxh3_64(&bytes).to_be_bytes()).into(),
})
}
}