Add JSON parser, use radix trie for string matching
This commit is contained in:
69
Cargo.lock
generated
69
Cargo.lock
generated
@@ -373,6 +373,12 @@ version = "1.15.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
|
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "endian-type"
|
||||||
|
version = "0.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "869b0adbda23651a9c5c0c3d270aac9fcb52e8622a8f2b17e57802d7791962f2"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "equivalent"
|
name = "equivalent"
|
||||||
version = "1.0.2"
|
version = "1.0.2"
|
||||||
@@ -617,6 +623,15 @@ dependencies = [
|
|||||||
"windows-sys 0.59.0",
|
"windows-sys 0.59.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "nibble_vec"
|
||||||
|
version = "0.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "77a5d83df9f36fe23f0c3648c6bbb8b0298bb5f1939c8f2704431371f4b84d43"
|
||||||
|
dependencies = [
|
||||||
|
"smallvec",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "num-traits"
|
name = "num-traits"
|
||||||
version = "0.2.19"
|
version = "0.2.19"
|
||||||
@@ -715,6 +730,16 @@ dependencies = [
|
|||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "radix_trie"
|
||||||
|
version = "0.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3b4431027dcd37fc2a73ef740b5f233aa805897935b8bce0195e41bbf9a3289a"
|
||||||
|
dependencies = [
|
||||||
|
"endian-type",
|
||||||
|
"nibble_vec",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ratatui"
|
name = "ratatui"
|
||||||
version = "0.29.0"
|
version = "0.29.0"
|
||||||
@@ -795,6 +820,48 @@ version = "1.2.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "serde"
|
||||||
|
version = "1.0.228"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
|
||||||
|
dependencies = [
|
||||||
|
"serde_core",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "serde_core"
|
||||||
|
version = "1.0.228"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
|
||||||
|
dependencies = [
|
||||||
|
"serde_derive",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "serde_derive"
|
||||||
|
version = "1.0.228"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "serde_json"
|
||||||
|
version = "1.0.145"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c"
|
||||||
|
dependencies = [
|
||||||
|
"itoa",
|
||||||
|
"memchr",
|
||||||
|
"ryu",
|
||||||
|
"serde",
|
||||||
|
"serde_core",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "sharded-slab"
|
name = "sharded-slab"
|
||||||
version = "0.1.7"
|
version = "0.1.7"
|
||||||
@@ -821,7 +888,9 @@ dependencies = [
|
|||||||
"crossterm 0.29.0",
|
"crossterm 0.29.0",
|
||||||
"eyre",
|
"eyre",
|
||||||
"ordered-float",
|
"ordered-float",
|
||||||
|
"radix_trie",
|
||||||
"ratatui",
|
"ratatui",
|
||||||
|
"serde_json",
|
||||||
"tracing",
|
"tracing",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
@@ -18,5 +18,7 @@ color-eyre = "0.6.5"
|
|||||||
crossterm = "0.29.0"
|
crossterm = "0.29.0"
|
||||||
eyre = "0.6.12"
|
eyre = "0.6.12"
|
||||||
ordered-float = "5.0.0"
|
ordered-float = "5.0.0"
|
||||||
|
radix_trie = "0.3.0"
|
||||||
ratatui = "0.29.0"
|
ratatui = "0.29.0"
|
||||||
|
serde_json = "1.0.145"
|
||||||
tracing = "0.1.41"
|
tracing = "0.1.41"
|
||||||
|
|||||||
@@ -1,12 +1,26 @@
|
|||||||
use std::collections::{HashMap, HashSet, BTreeMap};
|
#![feature(btree_cursors)]
|
||||||
|
|
||||||
|
use std::cmp::Ordering;
|
||||||
|
use std::collections::{HashMap, BTreeSet, BTreeMap};
|
||||||
|
use std::ops::Bound;
|
||||||
|
use std::iter;
|
||||||
|
|
||||||
use ordered_float::OrderedFloat;
|
use ordered_float::OrderedFloat;
|
||||||
|
use radix_trie::{Trie, TrieCommon};
|
||||||
|
|
||||||
use crate::log_parser::{LogParser, ParseError};
|
use crate::log_parser::{LogParser};
|
||||||
|
|
||||||
pub type LogIdx = usize;
|
pub type LogIdx = usize;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct IngestError {
|
||||||
|
pub idx: LogIdx,
|
||||||
|
pub message: String,
|
||||||
|
pub column: u32,
|
||||||
|
}
|
||||||
|
|
||||||
pub enum LogValue {
|
pub enum LogValue {
|
||||||
|
Null,
|
||||||
String(String),
|
String(String),
|
||||||
Integer(i64),
|
Integer(i64),
|
||||||
Float(f64),
|
Float(f64),
|
||||||
@@ -33,24 +47,27 @@ pub struct LogDatabase<T: LogParser> {
|
|||||||
/// property names are always converted to strings
|
/// property names are always converted to strings
|
||||||
/// nested properties are additionally indexed with each parent property prefixes, using slash notation
|
/// nested properties are additionally indexed with each parent property prefixes, using slash notation
|
||||||
/// Example: JSON { a: { b: c: 1 } } is indexed with the properties "c", "b/c", and "a/b/c"
|
/// Example: JSON { a: { b: c: 1 } } is indexed with the properties "c", "b/c", and "a/b/c"
|
||||||
property_index: HashMap<String, HashSet<LogIdx>>,
|
property_index: HashMap<String, BTreeSet<LogIdx>>,
|
||||||
reverse_index: ReverseIndex,
|
reverse_index: ReverseIndex,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T: LogParser> LogDatabase<T> {
|
impl<T: LogParser> LogDatabase<T> {
|
||||||
pub fn new(parser: T) -> LogDatabase<T> {
|
pub fn new(parser: T) -> Self {
|
||||||
LogDatabase {
|
Self {
|
||||||
parser,
|
parser,
|
||||||
|
|
||||||
logs_raw: Vec::new(),
|
logs_raw: Vec::new(),
|
||||||
property_index: HashMap::new(),
|
property_index: HashMap::new(),
|
||||||
reverse_index: ReverseIndex::new(),
|
reverse_index: ReverseIndex::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn ingest(&mut self, line: String) -> Result<LogIdx, ParseError> {
|
pub fn ingest(&mut self, line: String) -> Result<LogIdx, IngestError> {
|
||||||
let log = self.parser.parse_line(&line)?;
|
|
||||||
let idx = self.logs_raw.len();
|
let idx = self.logs_raw.len();
|
||||||
|
let log = self.parser.parse_line(&line).map_err(|error| IngestError {
|
||||||
|
idx: idx,
|
||||||
|
message: error.message,
|
||||||
|
column: error.column,
|
||||||
|
})?;
|
||||||
for field in log.fields.iter() {
|
for field in log.fields.iter() {
|
||||||
self.index_field(idx, &field.property, &field.value);
|
self.index_field(idx, &field.property, &field.value);
|
||||||
}
|
}
|
||||||
@@ -68,7 +85,7 @@ impl<T: LogParser> LogDatabase<T> {
|
|||||||
idxs.insert(idx);
|
idxs.insert(idx);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
let mut idxs = HashSet::new();
|
let mut idxs = BTreeSet::new();
|
||||||
idxs.insert(idx);
|
idxs.insert(idx);
|
||||||
self.property_index.insert(property.clone(), idxs);
|
self.property_index.insert(property.clone(), idxs);
|
||||||
}
|
}
|
||||||
@@ -77,6 +94,9 @@ impl<T: LogParser> LogDatabase<T> {
|
|||||||
// add to reverse index
|
// add to reverse index
|
||||||
let field_location = FieldLocation { log_idx: idx, property: property.clone() };
|
let field_location = FieldLocation { log_idx: idx, property: property.clone() };
|
||||||
match value {
|
match value {
|
||||||
|
LogValue::Null => {
|
||||||
|
self.reverse_index.nulls.insert(field_location);
|
||||||
|
},
|
||||||
LogValue::String(value) => {
|
LogValue::String(value) => {
|
||||||
let entry = self.reverse_index.strings.get_mut(value);
|
let entry = self.reverse_index.strings.get_mut(value);
|
||||||
|
|
||||||
@@ -84,7 +104,7 @@ impl<T: LogParser> LogDatabase<T> {
|
|||||||
field_locations.insert(field_location);
|
field_locations.insert(field_location);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
let mut field_locations = HashSet::new();
|
let mut field_locations = BTreeSet::new();
|
||||||
field_locations.insert(field_location);
|
field_locations.insert(field_location);
|
||||||
self.reverse_index.strings.insert(value.clone(), field_locations);
|
self.reverse_index.strings.insert(value.clone(), field_locations);
|
||||||
}
|
}
|
||||||
@@ -95,21 +115,32 @@ impl<T: LogParser> LogDatabase<T> {
|
|||||||
field_locations.insert(field_location);
|
field_locations.insert(field_location);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
let mut field_locations = HashSet::new();
|
let mut field_locations = BTreeSet::new();
|
||||||
field_locations.insert(field_location);
|
field_locations.insert(field_location);
|
||||||
self.reverse_index.integers.insert(*value, field_locations);
|
self.reverse_index.integers.insert(*value, field_locations);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
LogValue::Float(value) => {
|
LogValue::Float(value) => {
|
||||||
let ordered = OrderedFloat(*value);
|
let ordered = OrderedFloat(*value);
|
||||||
let entry = self.reverse_index.floats.get_mut(&ordered);
|
if let Some(field_locations) = self.reverse_index.floats.get_mut(&ordered) {
|
||||||
if let Some(field_locations) = entry {
|
field_locations.insert(field_location.clone());
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
let mut field_locations = BTreeSet::new();
|
||||||
|
field_locations.insert(field_location.clone());
|
||||||
|
self.reverse_index.floats.insert(ordered, field_locations);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// add to mantissa index
|
||||||
|
let bits = Bitsf64::from(*value);
|
||||||
|
if let Some(field_locations) = self.reverse_index.floats_mantissa.get_mut(&bits.mantissa) {
|
||||||
field_locations.insert(field_location);
|
field_locations.insert(field_location);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
let mut field_locations = HashSet::new();
|
let mut field_locations = BTreeSet::new();
|
||||||
field_locations.insert(field_location);
|
field_locations.insert(field_location);
|
||||||
self.reverse_index.floats.insert(ordered, field_locations);
|
self.reverse_index.floats_mantissa.insert(bits.mantissa, field_locations);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
LogValue::Boolean(value) => {
|
LogValue::Boolean(value) => {
|
||||||
@@ -118,7 +149,7 @@ impl<T: LogParser> LogDatabase<T> {
|
|||||||
field_locations.insert(field_location);
|
field_locations.insert(field_location);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
let mut field_locations = HashSet::new();
|
let mut field_locations = BTreeSet::new();
|
||||||
field_locations.insert(field_location);
|
field_locations.insert(field_location);
|
||||||
self.reverse_index.booleans.insert(*value, field_locations);
|
self.reverse_index.booleans.insert(*value, field_locations);
|
||||||
}
|
}
|
||||||
@@ -138,33 +169,116 @@ impl<T: LogParser> LogDatabase<T> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: implemnt as generic for LogValue types??
|
pub fn find_value_str(&self, value: &str) -> impl Iterator<Item = &FieldLocation> {
|
||||||
pub fn find_value_string(&self, value: &str) -> impl Iterator<Item = &FieldLocation> {
|
|
||||||
self.reverse_index.strings.get(value).into_iter().flatten()
|
self.reverse_index.strings.get(value).into_iter().flatten()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn find_matching_str(&self, value: &str) -> impl Iterator<Item = &FieldLocation> {
|
||||||
|
// prefix matching
|
||||||
|
self.reverse_index.strings.subtrie(value).into_iter()
|
||||||
|
.flat_map(|subtrie| { subtrie.iter() })
|
||||||
|
.flat_map(|(_, locations)| {
|
||||||
|
locations.iter()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn find_value_i64(&self, value: i64) -> impl Iterator<Item = &FieldLocation> {
|
||||||
|
self.reverse_index.integers.get(&value).into_iter().flatten()
|
||||||
|
}
|
||||||
|
|
||||||
|
// 1, 1.2, 1.5, 1.8, 2, 2.5, 12, 125
|
||||||
|
|
||||||
|
// 1
|
||||||
|
// 1, 1.2, 1.5, 1.8, 12, 125
|
||||||
|
|
||||||
|
// 2
|
||||||
|
// 2, 2.5
|
||||||
|
|
||||||
|
// pub fn find_value_f64_mantissa(&self, value: f64) -> impl Iterator<Item = &FieldLocation> {
|
||||||
|
// let bits = Bitsf64::from(value);
|
||||||
|
// let base10_digits = bits.mantissa
|
||||||
|
// let range = self.reverse_index.floats_mantissa.range((Bound::Included(bits.mantissa), Bound::Excluded(bits.mantissa + )));
|
||||||
|
// cursor.iter().flatten()
|
||||||
|
// }
|
||||||
|
|
||||||
|
pub fn find_value_bool(&self, value: bool) -> impl Iterator<Item = &FieldLocation> {
|
||||||
|
self.reverse_index.booleans.get(&value).into_iter().flatten()
|
||||||
|
}
|
||||||
|
pub fn find_value_null(&self) -> impl Iterator<Item = &FieldLocation> {
|
||||||
|
self.reverse_index.nulls.iter()
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ReverseIndex {
|
struct ReverseIndex {
|
||||||
strings: HashMap<String, HashSet<FieldLocation>>,
|
strings: Trie<String, BTreeSet<FieldLocation>>,
|
||||||
integers: BTreeMap<i64, HashSet<FieldLocation>>,
|
integers: BTreeMap<i64, BTreeSet<FieldLocation>>,
|
||||||
floats: BTreeMap<OrderedFloat<f64>, HashSet<FieldLocation>>,
|
floats: BTreeMap<OrderedFloat<f64>, BTreeSet<FieldLocation>>,
|
||||||
booleans: HashMap<bool, HashSet<FieldLocation>>,
|
floats_mantissa: BTreeMap<u64, BTreeSet<FieldLocation>>,
|
||||||
|
booleans: HashMap<bool, BTreeSet<FieldLocation>>,
|
||||||
|
nulls: BTreeSet<FieldLocation>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(PartialEq, Eq, Hash, Debug)]
|
#[derive(PartialEq, Eq, Hash, PartialOrd, Ord, Clone, Debug)]
|
||||||
pub struct FieldLocation {
|
pub struct FieldLocation {
|
||||||
log_idx: LogIdx,
|
pub log_idx: LogIdx,
|
||||||
property: Option<String>,
|
pub property: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
impl ReverseIndex {
|
impl ReverseIndex {
|
||||||
fn new() -> ReverseIndex {
|
fn new() -> Self {
|
||||||
ReverseIndex {
|
Self {
|
||||||
strings: HashMap::new(),
|
strings: Trie::new(),
|
||||||
integers: BTreeMap::new(),
|
integers: BTreeMap::new(),
|
||||||
floats: BTreeMap::new(),
|
floats: BTreeMap::new(),
|
||||||
|
floats_mantissa: BTreeMap::new(),
|
||||||
booleans: HashMap::new(),
|
booleans: HashMap::new(),
|
||||||
|
nulls: BTreeSet::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// float representations
|
||||||
|
|
||||||
|
const F64_EXPONENT_BITS: u32 = 11;
|
||||||
|
const F64_MANTISSA_BITS: u32 = 52;
|
||||||
|
|
||||||
|
struct Bitsf64 {
|
||||||
|
pub sign: bool,
|
||||||
|
pub exponent: u32,
|
||||||
|
pub mantissa: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Bitsf64 {
|
||||||
|
fn from(f: f64) -> Self {
|
||||||
|
let bits = f.to_bits();
|
||||||
|
Self {
|
||||||
|
sign: ((bits >> 63) & 1) == 1,
|
||||||
|
exponent: ((bits >> F64_MANTISSA_BITS) & ((1<<F64_EXPONENT_BITS)-1)) as u32,
|
||||||
|
mantissa: bits & ((1<<F64_MANTISSA_BITS)-1),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct f64MantissaOrd(pub Bitsf64);
|
||||||
|
|
||||||
|
impl Ord for f64MantissaOrd {
|
||||||
|
fn cmp(&self, other: &Self) -> Ordering {
|
||||||
|
(self.0.mantissa, self.0.exponent, self.0.sign).cmp(&(other.0.mantissa, other.0.exponent, other.0.sign))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialOrd for f64MantissaOrd {
|
||||||
|
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||||
|
Some(self.cmp(other))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialEq for f64MantissaOrd {
|
||||||
|
fn eq(&self, other: &Self) -> bool {
|
||||||
|
(self.0.mantissa, self.0.exponent, self.0.sign) == (other.0.mantissa, other.0.exponent, other.0.sign)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Eq for f64MantissaOrd { }
|
||||||
|
|
||||||
|
|||||||
@@ -1,56 +1,21 @@
|
|||||||
use crate::log_database::{Log, LogField, LogValue};
|
mod text_parser;
|
||||||
|
pub use text_parser::TextParser;
|
||||||
|
|
||||||
|
mod json_parser;
|
||||||
|
pub use json_parser::JsonParser;
|
||||||
|
|
||||||
|
use crate::log_database::{Log};
|
||||||
|
|
||||||
|
// Parses input into a string format
|
||||||
|
/// a Log consists of multiple keywords, which are indexed into the keyword database
|
||||||
|
/// each keyword has a data type, as well as text positional textual information for syntax highlighting
|
||||||
|
/// data can also be structured, so keywords need to know their position in the data structure for targetted queries
|
||||||
pub trait LogParser {
|
pub trait LogParser {
|
||||||
fn parse_line(&self, line: &str) -> Result<Log, ParseError>;
|
fn parse_line(&self, line: &str) -> Result<Log, ParseError>;
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct ParseError {
|
pub struct ParseError {
|
||||||
pub message: &'static str,
|
pub message: String,
|
||||||
pub col: u32,
|
pub column: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// a Log consists of multiple keywords, which are indexed into the keyword database
|
|
||||||
// each keyword has a data type, as well as text positional textual information for syntax highlighting
|
|
||||||
// data can also be structured, so keywords need to know their position in the data structure for targetted queries
|
|
||||||
|
|
||||||
pub struct TextParser<'a> {
|
|
||||||
pub separator: Option<&'a str>,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parses unstructured text word by word
|
|
||||||
impl<'a> LogParser for TextParser<'a> {
|
|
||||||
fn parse_line(&self, line: &str) -> Result<Log, ParseError> {
|
|
||||||
let mut split_iter;
|
|
||||||
let mut split_whitespace_iter;
|
|
||||||
|
|
||||||
let words: &mut dyn Iterator<Item = &str> = match self.separator {
|
|
||||||
Some(separator) => {
|
|
||||||
split_iter = line.split(separator);
|
|
||||||
&mut split_iter
|
|
||||||
},
|
|
||||||
None => {
|
|
||||||
split_whitespace_iter = line.split_whitespace();
|
|
||||||
&mut split_whitespace_iter
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
let fields: Vec<LogField> = words
|
|
||||||
.map(|word| LogField {
|
|
||||||
property: None,
|
|
||||||
value: LogValue::String(String::from(word))
|
|
||||||
}).collect();
|
|
||||||
|
|
||||||
Ok(Log {fields})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// pub struct JsonParser {
|
|
||||||
// }
|
|
||||||
|
|
||||||
// impl LogParser for JsonParser {
|
|
||||||
// fn parse_line(&self, line: &str) -> Result<Log, ParseError> {
|
|
||||||
// Ok(Log {})
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|||||||
80
src/log_parser/json_parser.rs
Normal file
80
src/log_parser/json_parser.rs
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use serde_json;
|
||||||
|
|
||||||
|
use super::{LogParser, ParseError};
|
||||||
|
use crate::log_database::{Log, LogField, LogValue};
|
||||||
|
|
||||||
|
pub struct JsonParser {
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses single-line JSON
|
||||||
|
impl LogParser for JsonParser {
|
||||||
|
fn parse_line(&self, line: &str) -> Result<Log, ParseError> {
|
||||||
|
let json_value: serde_json::Value = match serde_json::from_str(line) {
|
||||||
|
Ok(value) => value,
|
||||||
|
Err(error) => return Err(ParseError {
|
||||||
|
message: error.to_string(),
|
||||||
|
column: u32::try_from(error.column()).expect("column to fit in u32"),
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
|
||||||
|
let log_value = serde_value_to_log_value(json_value)?;
|
||||||
|
let fields: Vec<LogField> = match log_value {
|
||||||
|
LogValue::Null
|
||||||
|
| LogValue::String(_)
|
||||||
|
| LogValue::Integer(_)
|
||||||
|
| LogValue::Float(_)
|
||||||
|
| LogValue::Boolean(_) => vec!(LogField {
|
||||||
|
property: None,
|
||||||
|
value: log_value,
|
||||||
|
}),
|
||||||
|
LogValue::Array(array) => array.into_iter().map(|v| LogField {
|
||||||
|
property: None,
|
||||||
|
value: v,
|
||||||
|
}).collect(),
|
||||||
|
LogValue::Map(map) => map.into_iter().map(|(k, v)| LogField {
|
||||||
|
property: Some(k),
|
||||||
|
value: v,
|
||||||
|
}).collect(),
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(Log {fields})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn serde_value_to_log_value(json_value: serde_json::Value) -> Result<LogValue, ParseError> {
|
||||||
|
let value = match json_value {
|
||||||
|
serde_json::Value::String(value) => LogValue::String(value),
|
||||||
|
serde_json::Value::Null => LogValue::Null,
|
||||||
|
serde_json::Value::Number(value) => {
|
||||||
|
if let Some(as_int) = value.as_i64() { LogValue::Integer(as_int) }
|
||||||
|
else if let Some(as_float) = value.as_f64() { LogValue::Float(as_float) }
|
||||||
|
else { return Err(ParseError {
|
||||||
|
message: format!("Failed to parse number: {}", value.to_string()),
|
||||||
|
column: 0,
|
||||||
|
// TODO: update with serde_json spanned values
|
||||||
|
// https://github.com/serde-rs/json/issues/637
|
||||||
|
}) }
|
||||||
|
},
|
||||||
|
serde_json::Value::Bool(value) => LogValue::Boolean(value),
|
||||||
|
serde_json::Value::Array(values) => {
|
||||||
|
let mut array: Box<Vec<LogValue>> = Box::new(Vec::new());
|
||||||
|
for value in values {
|
||||||
|
array.push(serde_value_to_log_value(value)?);
|
||||||
|
}
|
||||||
|
|
||||||
|
LogValue::Array(array)
|
||||||
|
},
|
||||||
|
serde_json::Value::Object(properties) => {
|
||||||
|
let mut map: Box<HashMap<String, LogValue>> = Box::new(HashMap::new());
|
||||||
|
for (key, value) in properties {
|
||||||
|
map.insert(key, serde_value_to_log_value(value)?);
|
||||||
|
}
|
||||||
|
|
||||||
|
LogValue::Map(map)
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
return Ok(value);
|
||||||
|
}
|
||||||
33
src/log_parser/text_parser.rs
Normal file
33
src/log_parser/text_parser.rs
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
use super::{LogParser, ParseError};
|
||||||
|
use crate::log_database::{Log, LogField, LogValue};
|
||||||
|
|
||||||
|
pub struct TextParser<'a> {
|
||||||
|
pub separator: Option<&'a str>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses unstructured text word by word
|
||||||
|
impl LogParser for TextParser<'_> {
|
||||||
|
fn parse_line(&self, line: &str) -> Result<Log, ParseError> {
|
||||||
|
let mut split_iter;
|
||||||
|
let mut split_whitespace_iter;
|
||||||
|
|
||||||
|
let words: &mut dyn Iterator<Item = &str> = match self.separator {
|
||||||
|
Some(separator) => {
|
||||||
|
split_iter = line.split(separator);
|
||||||
|
&mut split_iter
|
||||||
|
},
|
||||||
|
None => {
|
||||||
|
split_whitespace_iter = line.split_whitespace();
|
||||||
|
&mut split_whitespace_iter
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
let fields: Vec<LogField> = words
|
||||||
|
.map(|word| LogField {
|
||||||
|
property: None,
|
||||||
|
value: LogValue::String(String::from(word))
|
||||||
|
}).collect();
|
||||||
|
|
||||||
|
Ok(Log {fields})
|
||||||
|
}
|
||||||
|
}
|
||||||
33
src/main.rs
33
src/main.rs
@@ -2,7 +2,7 @@
|
|||||||
// use atty::Stream;
|
// use atty::Stream;
|
||||||
|
|
||||||
use sift::log_database::LogDatabase;
|
use sift::log_database::LogDatabase;
|
||||||
use sift::log_parser::TextParser;
|
use sift::log_parser::{TextParser, JsonParser};
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
// if atty::is(Stream::Stdin) {
|
// if atty::is(Stream::Stdin) {
|
||||||
@@ -16,13 +16,28 @@ fn main() {
|
|||||||
// println!("{}", line);
|
// println!("{}", line);
|
||||||
// }
|
// }
|
||||||
|
|
||||||
let mut log_database = LogDatabase::new(TextParser { separator: None });
|
// let mut log_database = LogDatabase::new(TextParser { separator: None });
|
||||||
log_database.ingest(String::from("hello world")).unwrap();
|
// log_database.ingest(String::from("hello world")).unwrap();
|
||||||
log_database.ingest(String::from("have a good hello")).unwrap();
|
// log_database.ingest(String::from("have a good hello")).unwrap();
|
||||||
log_database.ingest(String::from("goodbye world")).unwrap();
|
// log_database.ingest(String::from("goodbye world")).unwrap();
|
||||||
|
|
||||||
println!("hello {:?}", log_database.find_value_string("hello").collect::<Vec<_>>());
|
// println!("hello {:?}", log_database.find_value_string("hello").collect::<Vec<_>>());
|
||||||
println!("have {:?}", log_database.find_value_string("have").collect::<Vec<_>>());
|
// println!("have {:?}", log_database.find_value_string("have").collect::<Vec<_>>());
|
||||||
println!("world {:?}", log_database.find_value_string("world").collect::<Vec<_>>());
|
// println!("world {:?}", log_database.find_value_string("world").collect::<Vec<_>>());
|
||||||
println!("elliot {:?}", log_database.find_value_string("elliot").collect::<Vec<_>>());
|
// println!("elliot {:?}", log_database.find_value_string("elliot").collect::<Vec<_>>());
|
||||||
|
|
||||||
|
let mut log_database = LogDatabase::new(JsonParser {});
|
||||||
|
log_database.ingest(String::from("null")).unwrap();
|
||||||
|
log_database.ingest(String::from("\"hello\"")).unwrap();
|
||||||
|
log_database.ingest(String::from("42")).unwrap();
|
||||||
|
log_database.ingest(String::from("3.14")).unwrap();
|
||||||
|
log_database.ingest(String::from("true")).unwrap();
|
||||||
|
log_database.ingest(String::from("{\"greeting\": \"hello\", \"name\": \"world\"}")).unwrap();
|
||||||
|
log_database.ingest(String::from("{\"greeting\": \"hello\", \"name\": \"elliot\"}")).unwrap();
|
||||||
|
|
||||||
|
println!("hello {:?}", log_database.find_value_str("hello").collect::<Vec<_>>());
|
||||||
|
println!("world {:?}", log_database.find_value_str("world").collect::<Vec<_>>());
|
||||||
|
|
||||||
|
println!("matching");
|
||||||
|
println!("hello {:?}", log_database.find_matching_str("hello").collect::<Vec<_>>());
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user