]> git.ipfire.org Git - thirdparty/google/fonts.git/commitdiff
Rust crate (#158)
authorSimon Cozens <simon@simon-cozens.org>
Fri, 8 Nov 2024 15:52:12 +0000 (15:52 +0000)
committerGitHub <noreply@github.com>
Fri, 8 Nov 2024 15:52:12 +0000 (15:52 +0000)
* Initial commit

* Reduce chunk size to 100, to avoid pathological compile times

* rustfmt

* Loosen deps

* Tests, kind of

* Rename crate

* Use serde_json to store the data

Cargo.toml [new file with mode: 0644]
build.rs [new file with mode: 0644]
src/lib.rs [new file with mode: 0644]

diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644 (file)
index 0000000..90cf6dd
--- /dev/null
@@ -0,0 +1,23 @@
+[package]
+name = "google-fonts-languages"
+version = "0.6.3"
+edition = "2021"
+
+[dependencies]
+bytes = "1.7.1"
+prost = "0.13"
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
+
+[build-dependencies]
+prost-build = "0.13"
+protobuf-support = { git = "https://github.com/cmyr/rust-protobuf", branch = "parse-unicode-strings" }
+protobuf = { git = "https://github.com/cmyr/rust-protobuf", branch = "parse-unicode-strings" }
+protobuf-parse = { git = "https://github.com/cmyr/rust-protobuf", branch = "parse-unicode-strings" }
+glob = "*"                                                                                             # This is a joke.
+prettyplease = "0.2"
+quote = "1.0"
+proc-macro2 = "1.0"
+syn = "2.0"
+itertools = "0.13"
+serde_json = "1.0"
diff --git a/build.rs b/build.rs
new file mode 100644 (file)
index 0000000..c5e4c45
--- /dev/null
+++ b/build.rs
@@ -0,0 +1,168 @@
+use proc_macro2::TokenStream;
+use protobuf::reflect::{FieldDescriptor, ReflectValueRef};
+use quote::quote;
+use serde_json::Map;
+use std::io::{BufWriter, Write};
+use std::{env, fs::File, path::Path};
+
+fn main() {
+    // First we load up the descriptor using the protobuf crate
+    // so that we can do reflection on it.
+    let descriptors = protobuf_parse::Parser::new()
+        .pure()
+        .include(".")
+        .input("Lib/gflanguages/languages_public.proto")
+        .file_descriptor_set()
+        .expect("Could not parse languages_public.proto");
+    let protofile = descriptors.file.first().expect("No file in descriptor");
+    let descriptor = protobuf::reflect::FileDescriptor::new_dynamic(protofile.clone(), &[])
+        .expect("Could not create descriptor");
+
+    // Now we use the prost crate to compile them, so that we can
+    // generate Rust structs.
+    let mut config = prost_build::Config::new();
+    // config.boxed(".google.languages_public.LanguageProto.sample_text");
+    // config.boxed(".google.languages_public.LanguageProto.exemplar_chars");
+
+    // The reflection can tell us what messages we have, so we can configure
+    // them to be deserializable with serde
+    for message in descriptor.messages() {
+        config.type_attribute(
+            message.full_name(),
+            "#[derive(serde::Serialize, serde::Deserialize)]",
+        );
+    }
+    // Let's make our structs; this produces google.languages_public.rs
+    config
+        .compile_protos(
+            &["Lib/gflanguages/languages_public.proto"],
+            &["Lib/gflanguages/"],
+        )
+        .expect("Could not compile languages_public.proto");
+
+    let path = Path::new(&env::var("OUT_DIR").unwrap()).join("data.rs");
+    let mut file = BufWriter::new(File::create(path).unwrap());
+    let mut output = quote! { use std::collections::BTreeMap; use std::sync::LazyLock; };
+
+    output.extend(serialize_a_structure(
+        ".google.languages_public.RegionProto",
+        "Lib/gflanguages/data/regions/*.textproto",
+        "REGIONS",
+        &descriptor,
+    ));
+
+    output.extend(serialize_a_structure(
+        ".google.languages_public.ScriptProto",
+        "Lib/gflanguages/data/scripts/*.textproto",
+        "SCRIPTS",
+        &descriptor,
+    ));
+
+    output.extend(serialize_a_structure(
+        ".google.languages_public.LanguageProto",
+        "Lib/gflanguages/data/languages/*.textproto",
+        "LANGUAGES",
+        &descriptor,
+    ));
+    // file.write_all(output.to_string().as_bytes())
+    //     .expect("Could not write to file");
+
+    let abstract_file: syn::File = syn::parse2(output).expect("Could not parse output");
+    let formatted = prettyplease::unparse(&abstract_file);
+    file.write_all(formatted.as_bytes())
+        .expect("Could not write to file");
+}
+
+fn serialize_a_structure(
+    proto_name: &str,
+    pathglob: &str,
+    output_variable: &str,
+    descriptor: &protobuf::reflect::FileDescriptor,
+) -> TokenStream {
+    let proto = descriptor
+        .message_by_full_name(proto_name)
+        .unwrap_or_else(|| panic!("No {} message", proto_name));
+    let files: Vec<std::path::PathBuf> = glob::glob(pathglob)
+        .expect("Failed to read glob pattern")
+        .flatten()
+        .collect();
+    let name: TokenStream = proto.name().parse().unwrap();
+    let variable: TokenStream = output_variable.parse().unwrap();
+    let mut map = Map::new();
+    for file in files.into_iter() {
+        serialize_file(file, &proto, &mut map);
+    }
+    let json_var: TokenStream = format!("__{}", output_variable).parse().unwrap();
+    let docmsg = format!("A map of all the {} objects", name);
+    let json_dump = serde_json::to_string(&map).expect("Could not serialize");
+    quote! {
+        static #json_var: &str = #json_dump;
+
+        #[doc = #docmsg]
+        pub static #variable: LazyLock<BTreeMap<String, Box<#name>>> = LazyLock::new(|| {
+            serde_json::from_str(#json_var).expect("Could not deserialize")
+        });
+    }
+}
+fn serialize_file(
+    path: std::path::PathBuf,
+    descriptor: &protobuf::reflect::MessageDescriptor,
+    value: &mut Map<String, serde_json::Value>,
+) {
+    let mut message = descriptor.new_instance();
+    let message_mut = message.as_mut();
+    let input = std::fs::read_to_string(&path).expect("Could not read file");
+    protobuf::text_format::merge_from_str(message_mut, &input)
+        .unwrap_or_else(|e| panic!("Could not parse file {:?}: {:?}", path, e));
+    let id = path.file_stem().unwrap().to_str().unwrap();
+    value.insert(id.to_string(), serialize_message(message_mut));
+}
+
+fn serialize_message(message: &dyn protobuf::MessageDyn) -> serde_json::Value {
+    let descriptor = message.descriptor_dyn();
+    // let descriptor_name: TokenStream = descriptor.name().parse().unwrap();
+    let mut output = Map::new();
+    for field in descriptor.fields() {
+        let field_name: TokenStream = field.name().parse().unwrap();
+        let field_contents = serialize_field(&field, message);
+        output.insert(field_name.to_string(), field_contents);
+    }
+    output.into()
+}
+
+fn serialize_field(
+    field: &FieldDescriptor,
+    message: &dyn protobuf::MessageDyn,
+) -> serde_json::Value {
+    if field.is_repeated() {
+        let v: Vec<serde_json::Value> = field
+            .get_repeated(message)
+            .into_iter()
+            .map(|value| serialize_field_value(field, value))
+            .collect();
+        v.into()
+    } else if field.is_required() {
+        serialize_field_value(field, field.get_singular(message).unwrap())
+    } else if field.has_field(message) {
+        let value = serialize_field_value(field, field.get_singular(message).unwrap());
+        value.into()
+    } else {
+        serde_json::Value::Null
+    }
+}
+
+fn serialize_field_value(_field: &FieldDescriptor, value: ReflectValueRef) -> serde_json::Value {
+    match value {
+        ReflectValueRef::Bool(value) => value.into(),
+        ReflectValueRef::I32(value) => value.into(),
+        ReflectValueRef::I64(value) => value.into(),
+        ReflectValueRef::U32(value) => value.into(),
+        ReflectValueRef::U64(value) => value.into(),
+        ReflectValueRef::F32(value) => value.into(),
+        ReflectValueRef::F64(value) => value.into(),
+        ReflectValueRef::String(value) => value.into(),
+        ReflectValueRef::Bytes(value) => value.into(),
+        ReflectValueRef::Enum(_value, _ix) => unimplemented!(),
+        ReflectValueRef::Message(value) => serialize_message(&*value),
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644 (file)
index 0000000..97a4c6d
--- /dev/null
@@ -0,0 +1,25 @@
+include!(concat!(env!("OUT_DIR"), "/google.languages_public.rs"));
+include!(concat!(env!("OUT_DIR"), "/data.rs"));
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn regions() {
+        assert!((*REGIONS).contains_key("BG"));
+        assert_eq!(REGIONS.get("BG").unwrap().name.as_deref(), Some("Bulgaria"));
+    }
+
+    #[test]
+    fn scripts() {
+        assert!((*SCRIPTS).contains_key("Arab"));
+        assert_eq!(SCRIPTS.get("Arab").unwrap().name.as_deref(), Some("Arabic"));
+    }
+
+    #[test]
+    fn languages() {
+        assert!(LANGUAGES.len() > 1000);
+        assert!((*LANGUAGES).contains_key("ar_Arab"));
+    }
+}