1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
|
module JSONFilter
alias BracketIndex = Hash(Int64, Int64)
alias GroupedFieldsValue = String | Array(GroupedFieldsValue)
alias GroupedFieldsList = Array(GroupedFieldsValue)
class FieldsParser
class ParseError < Exception
end
# Returns the `Regex` pattern used to match nest groups
def self.nest_group_pattern : Regex
# uses a '.' character to match json keys as they are allowed
# to contain any unicode codepoint
/(?:|,)(?<groupname>[^,\n]*?)\(/
end
# Returns the `Regex` pattern used to check if there are any empty nest groups
def self.unnamed_nest_group_pattern : Regex
/^\(|\(\(|\/\(/
end
def self.parse_fields(fields_text : String) : Nil
if fields_text.empty?
raise FieldsParser::ParseError.new "Fields is empty"
end
opening_bracket_count = fields_text.count('(')
closing_bracket_count = fields_text.count(')')
if opening_bracket_count != closing_bracket_count
bracket_type = opening_bracket_count > closing_bracket_count ? "opening" : "closing"
raise FieldsParser::ParseError.new "There are too many #{bracket_type} brackets (#{opening_bracket_count}:#{closing_bracket_count})"
elsif match_result = unnamed_nest_group_pattern.match(fields_text)
raise FieldsParser::ParseError.new "Unnamed nest group at position #{match_result.begin}"
end
# first, handle top-level single nested properties: items/id, playlistItems/snippet, etc
parse_single_nests(fields_text) { |nest_list| yield nest_list }
# next, handle nest groups: items(id, etag, etc)
parse_nest_groups(fields_text) { |nest_list| yield nest_list }
end
def self.parse_single_nests(fields_text : String) : Nil
single_nests = remove_nest_groups(fields_text)
if !single_nests.empty?
property_nests = single_nests.split(',')
property_nests.each do |nest|
nest_list = nest.split('/')
if nest_list.includes? ""
raise FieldsParser::ParseError.new "Empty key in nest list: #{nest_list}"
end
yield nest_list
end
# else
# raise FieldsParser::ParseError.new "Empty key in nest list 22: #{fields_text} | #{single_nests}"
end
end
def self.parse_nest_groups(fields_text : String) : Nil
nest_stack = [] of NamedTuple(group_name: String, closing_bracket_index: Int64)
bracket_pairs = get_bracket_pairs(fields_text, true)
text_index = 0
regex_index = 0
while regex_result = self.nest_group_pattern.match(fields_text, regex_index)
raw_match = regex_result[0]
group_name = regex_result["groupname"]
text_index = regex_result.begin
regex_index = regex_result.end
if text_index.nil? || regex_index.nil?
raise FieldsParser::ParseError.new "Received invalid index while parsing nest groups: text_index: #{text_index} | regex_index: #{regex_index}"
end
offset = raw_match.starts_with?(',') ? 1 : 0
opening_bracket_index = (text_index + group_name.size) + offset
closing_bracket_index = bracket_pairs[opening_bracket_index]
content_start = opening_bracket_index + 1
content = fields_text[content_start...closing_bracket_index]
if content.empty?
raise FieldsParser::ParseError.new "Empty nest group at position #{content_start}"
else
content = remove_nest_groups(content)
end
while nest_stack.size > 0 && closing_bracket_index > nest_stack[nest_stack.size - 1][:closing_bracket_index]
if nest_stack.size
nest_stack.pop
end
end
group_name.split('/').each do |name|
nest_stack.push({
group_name: name,
closing_bracket_index: closing_bracket_index,
})
end
if !content.empty?
properties = content.split(',')
properties.each do |prop|
nest_list = nest_stack.map { |nest_prop| nest_prop[:group_name] }
if !prop.empty?
if prop.includes?('/')
parse_single_nests(prop) { |list| nest_list += list }
else
nest_list.push prop
end
else
raise FieldsParser::ParseError.new "Empty key in nest list: #{nest_list << prop}"
end
yield nest_list
end
end
end
end
def self.remove_nest_groups(text : String) : String
content_bracket_pairs = get_bracket_pairs(text, false)
content_bracket_pairs.each_key.to_a.reverse.each do |opening_bracket|
closing_bracket = content_bracket_pairs[opening_bracket]
last_comma = text.rindex(',', opening_bracket) || 0
text = text[0...last_comma] + text[closing_bracket + 1...text.size]
end
return text.starts_with?(',') ? text[1...text.size] : text
end
def self.get_bracket_pairs(text : String, recursive = true) : BracketIndex
istart = [] of Int64
bracket_index = BracketIndex.new
text.each_char_with_index do |char, index|
if char == '('
istart.push(index.to_i64)
end
if char == ')'
begin
opening = istart.pop
if recursive || (!recursive && istart.size == 0)
bracket_index[opening] = index.to_i64
end
rescue
raise FieldsParser::ParseError.new "No matching opening parenthesis at: #{index}"
end
end
end
if istart.size != 0
idx = istart.pop
raise FieldsParser::ParseError.new "No matching closing parenthesis at: #{idx}"
end
return bracket_index
end
end
class FieldsGrouper
alias SkeletonValue = Hash(String, SkeletonValue)
def self.create_json_skeleton(fields_text : String) : SkeletonValue
root_hash = {} of String => SkeletonValue
FieldsParser.parse_fields(fields_text) do |nest_list|
current_item = root_hash
nest_list.each do |key|
if current_item[key]?
current_item = current_item[key]
else
current_item[key] = {} of String => SkeletonValue
current_item = current_item[key]
end
end
end
root_hash
end
def self.create_grouped_fields_list(json_skeleton : SkeletonValue) : GroupedFieldsList
grouped_fields_list = GroupedFieldsList.new
json_skeleton.each do |key, value|
grouped_fields_list.push key
nested_keys = create_grouped_fields_list(value)
grouped_fields_list.push nested_keys unless nested_keys.empty?
end
return grouped_fields_list
end
end
class FilterError < Exception
end
def self.filter(item : JSON::Any, fields_text : String, in_place : Bool = true)
skeleton = FieldsGrouper.create_json_skeleton(fields_text)
grouped_fields_list = FieldsGrouper.create_grouped_fields_list(skeleton)
filter(item, grouped_fields_list, in_place)
end
def self.filter(item : JSON::Any, grouped_fields_list : GroupedFieldsList, in_place : Bool = true) : JSON::Any
item = item.clone unless in_place
if !item.as_h? && !item.as_a?
raise FilterError.new "Can't filter '#{item}' by #{grouped_fields_list}"
end
top_level_keys = Array(String).new
grouped_fields_list.each do |value|
if value.is_a? String
top_level_keys.push value
elsif value.is_a? Array
if !top_level_keys.empty?
key_to_filter = top_level_keys.last
if item.as_h?
filter(item[key_to_filter], value, in_place: true)
elsif item.as_a?
item.as_a.each { |arr_item| filter(arr_item[key_to_filter], value, in_place: true) }
end
else
raise FilterError.new "Tried to filter while top level keys list is empty"
end
end
end
if item.as_h?
item.as_h.select! top_level_keys
elsif item.as_a?
item.as_a.map { |value| filter(value, top_level_keys, in_place: true) }
end
item
end
end
|