summaryrefslogtreecommitdiff
path: root/src/invidious/helpers/json_filter.cr
blob: b8e8f96d05c0d2646664e51283afc88415791b27 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
module JSONFilter
  alias BracketIndex = Hash(Int64, Int64)

  alias GroupedFieldsValue = String | Array(GroupedFieldsValue)
  alias GroupedFieldsList = Array(GroupedFieldsValue)

  class FieldsParser
    class ParseError < Exception
    end

    # Returns the `Regex` pattern used to match nest groups
    def self.nest_group_pattern : Regex
      # uses a '.' character to match json keys as they are allowed
      # to contain any unicode codepoint
      /(?:|,)(?<groupname>[^,\n]*?)\(/
    end

    # Returns the `Regex` pattern used to check if there are any empty nest groups
    def self.unnamed_nest_group_pattern : Regex
      /^\(|\(\(|\/\(/
    end

    def self.parse_fields(fields_text : String) : Nil
      if fields_text.empty?
        raise FieldsParser::ParseError.new "Fields is empty"
      end

      opening_bracket_count = fields_text.count('(')
      closing_bracket_count = fields_text.count(')')

      if opening_bracket_count != closing_bracket_count
        bracket_type = opening_bracket_count > closing_bracket_count ? "opening" : "closing"
        raise FieldsParser::ParseError.new "There are too many #{bracket_type} brackets (#{opening_bracket_count}:#{closing_bracket_count})"
      elsif match_result = unnamed_nest_group_pattern.match(fields_text)
        raise FieldsParser::ParseError.new "Unnamed nest group at position #{match_result.begin}"
      end

      # first, handle top-level single nested properties: items/id, playlistItems/snippet, etc
      parse_single_nests(fields_text) { |nest_list| yield nest_list }

      # next, handle nest groups: items(id, etag, etc)
      parse_nest_groups(fields_text) { |nest_list| yield nest_list }
    end

    def self.parse_single_nests(fields_text : String) : Nil
      single_nests = remove_nest_groups(fields_text)

      if !single_nests.empty?
        property_nests = single_nests.split(',')

        property_nests.each do |nest|
          nest_list = nest.split('/')
          if nest_list.includes? ""
            raise FieldsParser::ParseError.new "Empty key in nest list: #{nest_list}"
          end
          yield nest_list
        end
        # else
        #   raise FieldsParser::ParseError.new "Empty key in nest list 22: #{fields_text} | #{single_nests}"
      end
    end

    def self.parse_nest_groups(fields_text : String) : Nil
      nest_stack = [] of NamedTuple(group_name: String, closing_bracket_index: Int64)
      bracket_pairs = get_bracket_pairs(fields_text, true)

      text_index = 0
      regex_index = 0

      while regex_result = self.nest_group_pattern.match(fields_text, regex_index)
        raw_match = regex_result[0]
        group_name = regex_result["groupname"]

        text_index = regex_result.begin
        regex_index = regex_result.end

        if text_index.nil? || regex_index.nil?
          raise FieldsParser::ParseError.new "Received invalid index while parsing nest groups: text_index: #{text_index} | regex_index: #{regex_index}"
        end

        offset = raw_match.starts_with?(',') ? 1 : 0

        opening_bracket_index = (text_index + group_name.size) + offset
        closing_bracket_index = bracket_pairs[opening_bracket_index]
        content_start = opening_bracket_index + 1

        content = fields_text[content_start...closing_bracket_index]

        if content.empty?
          raise FieldsParser::ParseError.new "Empty nest group at position #{content_start}"
        else
          content = remove_nest_groups(content)
        end

        while nest_stack.size > 0 && closing_bracket_index > nest_stack[nest_stack.size - 1][:closing_bracket_index]
          if nest_stack.size
            nest_stack.pop
          end
        end

        group_name.split('/').each do |name|
          nest_stack.push({
            group_name:            name,
            closing_bracket_index: closing_bracket_index,
          })
        end

        if !content.empty?
          properties = content.split(',')

          properties.each do |prop|
            nest_list = nest_stack.map { |nest_prop| nest_prop[:group_name] }

            if !prop.empty?
              if prop.includes?('/')
                parse_single_nests(prop) { |list| nest_list += list }
              else
                nest_list.push prop
              end
            else
              raise FieldsParser::ParseError.new "Empty key in nest list: #{nest_list << prop}"
            end

            yield nest_list
          end
        end
      end
    end

    def self.remove_nest_groups(text : String) : String
      content_bracket_pairs = get_bracket_pairs(text, false)

      content_bracket_pairs.each_key.to_a.reverse.each do |opening_bracket|
        closing_bracket = content_bracket_pairs[opening_bracket]
        last_comma = text.rindex(',', opening_bracket) || 0

        text = text[0...last_comma] + text[closing_bracket + 1...text.size]
      end

      return text.starts_with?(',') ? text[1...text.size] : text
    end

    def self.get_bracket_pairs(text : String, recursive = true) : BracketIndex
      istart = [] of Int64
      bracket_index = BracketIndex.new

      text.each_char_with_index do |char, index|
        if char == '('
          istart.push(index.to_i64)
        end

        if char == ')'
          begin
            opening = istart.pop
            if recursive || (!recursive && istart.size == 0)
              bracket_index[opening] = index.to_i64
            end
          rescue
            raise FieldsParser::ParseError.new "No matching opening parenthesis at: #{index}"
          end
        end
      end

      if istart.size != 0
        idx = istart.pop
        raise FieldsParser::ParseError.new "No matching closing parenthesis at: #{idx}"
      end

      return bracket_index
    end
  end

  class FieldsGrouper
    alias SkeletonValue = Hash(String, SkeletonValue)

    def self.create_json_skeleton(fields_text : String) : SkeletonValue
      root_hash = {} of String => SkeletonValue

      FieldsParser.parse_fields(fields_text) do |nest_list|
        current_item = root_hash
        nest_list.each do |key|
          if current_item[key]?
            current_item = current_item[key]
          else
            current_item[key] = {} of String => SkeletonValue
            current_item = current_item[key]
          end
        end
      end
      root_hash
    end

    def self.create_grouped_fields_list(json_skeleton : SkeletonValue) : GroupedFieldsList
      grouped_fields_list = GroupedFieldsList.new
      json_skeleton.each do |key, value|
        grouped_fields_list.push key

        nested_keys = create_grouped_fields_list(value)
        grouped_fields_list.push nested_keys unless nested_keys.empty?
      end
      return grouped_fields_list
    end
  end

  class FilterError < Exception
  end

  def self.filter(item : JSON::Any, fields_text : String, in_place : Bool = true)
    skeleton = FieldsGrouper.create_json_skeleton(fields_text)
    grouped_fields_list = FieldsGrouper.create_grouped_fields_list(skeleton)
    filter(item, grouped_fields_list, in_place)
  end

  def self.filter(item : JSON::Any, grouped_fields_list : GroupedFieldsList, in_place : Bool = true) : JSON::Any
    item = item.clone unless in_place

    if !item.as_h? && !item.as_a?
      raise FilterError.new "Can't filter '#{item}' by #{grouped_fields_list}"
    end

    top_level_keys = Array(String).new
    grouped_fields_list.each do |value|
      if value.is_a? String
        top_level_keys.push value
      elsif value.is_a? Array
        if !top_level_keys.empty?
          key_to_filter = top_level_keys.last

          if item.as_h?
            filter(item[key_to_filter], value, in_place: true)
          elsif item.as_a?
            item.as_a.each { |arr_item| filter(arr_item[key_to_filter], value, in_place: true) }
          end
        else
          raise FilterError.new "Tried to filter while top level keys list is empty"
        end
      end
    end

    if item.as_h?
      item.as_h.select! top_level_keys
    elsif item.as_a?
      item.as_a.map { |value| filter(value, top_level_keys, in_place: true) }
    end

    item
  end
end