blob: a444f714c3700d6f614e8963ca691f094b7f6e46 [file] [log] [blame]
Zsolt Haraszti46c72002016-10-10 09:55:30 -07001#
Zsolt Harasztiaccad4a2017-01-03 21:56:48 -08002# Copyright 2017 the original author or authors.
Zsolt Haraszti46c72002016-10-10 09:55:30 -07003#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15#
16import re
17from collections import OrderedDict
18from copy import copy
19
20from google.protobuf.descriptor import FieldDescriptor
21
22re_path_param = re.compile(r'/{([^{]+)}')
23re_segment = re.compile(r'/(?P<absolute>[^{}/]+)|(?P<symbolic>{[^}]+})')
24
25
Zack Williams7eb36d02019-03-19 07:16:12 -070026class DuplicateMethodAndPathError(Exception):
27 pass
28
29
30class ProtobufCompilationFailedError(Exception):
31 pass
32
33
34class InvalidPathArgumentError(Exception):
35 pass
Zsolt Haraszti46c72002016-10-10 09:55:30 -070036
37
38def native_descriptors_to_swagger(native_descriptors):
39 """
40 Generate a swagger data dict from the native descriptors extracted
41 from protobuf file(s).
42 :param native_descriptors:
43 Dict as extracted from proto file descriptors.
44 See DescriptorParser and its parse_file_descriptors() method.
45 :return: dict ready to be serialized to JSON as swagger.json file.
46 """
47
48 # gather all top-level and nested message type definitions and build map
49 message_types_dict = gather_all_message_types(native_descriptors)
50 message_type_names = set(message_types_dict.iterkeys())
51
52 # create similar map for all top-level and nested enum definitions
53 enum_types_dict = gather_all_enum_types(native_descriptors)
54 enum_type_names = set(enum_types_dict.iterkeys())
55
56 # make sure none clashes and generate set of all names (for sanity checks)
57 assert not message_type_names.intersection(enum_type_names)
58 all_type_names = message_type_names.union(enum_type_names)
59 all_types = {}
60 all_types.update(message_types_dict)
61 all_types.update(enum_types_dict)
62
63 # gather all method definitions and collect all referenced input/output
64 # types
65 types_referenced, methods_dict = gather_all_methods(native_descriptors)
66
67 # process all directly and indirectly referenced types into JSON schema
68 # type definitions
69 definitions = generate_definitions(types_referenced, all_types)
70
71 # process all method and generate the swagger path entries
72 paths = generate_paths(methods_dict, definitions)
73
74 # static part
75 # last descriptor is assumed to be the top-most one
76 root_descriptor = native_descriptors[-1]
77 swagger = {
78 'swagger': "2.0",
79 'info': {
80 'title': root_descriptor['name'],
81 'version': "version not set"
82 },
83 'schemes': ["http", "https"],
84 'consumes': ["application/json"],
85 'produces': ["application/json"],
86 'paths': paths,
87 'definitions': definitions
88 }
89
90 return swagger
91
92
93def gather_all_message_types(descriptors):
94 return dict(
95 (full_name, message_type)
96 for full_name, message_type
97 in iterate_message_types(descriptors)
98 )
99
100
101def gather_all_enum_types(descriptors):
102 return dict(
103 (full_name, enum_type)
104 for full_name, enum_type
105 in iterate_enum_types(descriptors)
106 )
107
108
109def gather_all_methods(descriptors):
110 types_referenced = set()
111 methods = OrderedDict()
112 for full_name, service, method in iterate_methods(descriptors):
113 methods[full_name] = (service, method)
114 types_referenced.add(method['input_type'].strip('.'))
115 types_referenced.add(method['output_type'].strip('.'))
116 return types_referenced, methods
117
118
119def iterate_methods(descriptors):
120 for descriptor in descriptors:
121 package = descriptor['package']
122 for service in descriptor.get('service', []):
123 service_prefix = package + '.' + service['name']
124 for method in service.get('method', []):
125 # skip methods that do not have http options
126 options = method['options']
Zack Williams7eb36d02019-03-19 07:16:12 -0700127 if 'http' in options:
Zsolt Haraszti46c72002016-10-10 09:55:30 -0700128 full_name = service_prefix + '.' + method['name']
129 yield full_name, service, method
130
131
132def iterate_for_type_in(message_types, prefix):
133 for message_type in message_types:
134 full_name = prefix + '.' + message_type['name']
135 yield full_name, message_type
136 for nested_full_name, nested in iterate_for_type_in(
137 message_type.get('nested_type', []), full_name):
138 yield nested_full_name, nested
139
140
141def iterate_message_types(descriptors):
142 for descriptor in descriptors:
143 package = descriptor['package']
144 top_types = descriptor.get('message_type', [])
145 for full_name, message_type in iterate_for_type_in(top_types, package):
146 yield full_name, message_type
147
148
149def iterate_enum_types(descriptors):
150 for descriptor in descriptors:
151 package = descriptor['package']
152 for enum in descriptor.get('enum_type', []):
153 enum_full_name = package + '.' + enum['name']
154 yield enum_full_name, enum
155 top_types = descriptor.get('message_type', [])
156 for full_name, message_type in iterate_for_type_in(top_types, package):
157 for enum in message_type.get('enum_type', []):
158 enum_full_name = full_name + '.' + enum['name']
159 yield enum_full_name, enum
160
161
162def generate_definitions(types_referenced, types):
163 """Walk all the referenced types and for each, generate a JSON schema
164 definition. These may also refer to other types, so keep the needed
165 set up-to-date.
166 """
167 definitions = {}
168 wanted = copy(types_referenced)
169 while wanted:
170 full_name = wanted.pop()
171 type = types[full_name]
172 definition, types_referenced = make_definition(type, types)
173 definitions[full_name] = definition
174 for type_referenced in types_referenced:
Zack Williams7eb36d02019-03-19 07:16:12 -0700175 if type_referenced not in definitions:
Zsolt Haraszti46c72002016-10-10 09:55:30 -0700176 wanted.add(type_referenced)
177 return definitions
178
179
180def make_definition(type, types):
181 if type['_type'] == 'google.protobuf.EnumDescriptorProto':
182 return make_enum_definition(type), set()
183 else:
184 return make_object_definition(type, types)
185
186
187def make_enum_definition(type):
188
189 def make_value_desc(enum_value):
190 txt = ' - {}'.format(enum_value['name'])
191 description = enum_value.get('_description', '')
192 if description:
193 txt += ': {}'.format(description)
194 return txt
195
196 string_values = [v['name'] for v in type['value']]
197 default = type['value'][0]['name']
198 description = (
199 (type.get('_description', '') or type['name'])
200 + '\nValid values:\n'
201 + '\n'.join(make_value_desc(v) for v in type['value'])
202 )
203
204 definition = {
205 'type': 'string',
206 'enum': string_values,
207 'default': default,
208 'description': description
209 }
210
211 return definition
212
213
214def make_object_definition(type, types):
215
216 definition = {
217 'type': 'object'
218 }
219
220 referenced = set()
221 properties = {}
222 for field in type.get('field', []):
223 field_name, property, referenced_by_field = make_property(field, types)
224 properties[field_name] = property
225 referenced.update(referenced_by_field)
226
227 if properties:
228 definition['properties'] = properties
229
Zack Williams7eb36d02019-03-19 07:16:12 -0700230 if '_description' in type:
Zsolt Haraszti46c72002016-10-10 09:55:30 -0700231 definition['description'] = type['_description']
232
233 return definition, referenced
234
235
236def make_property(field, types):
237
238 referenced = set()
239
240 repeated = field['label'] == FieldDescriptor.LABEL_REPEATED
241
242 def check_if_map_entry(type_name):
243 type = types[type_name]
244 if type.get('options', {}).get('map_entry', False):
245 _, property, __ = make_property(type['field'][1], types)
246 return property
247
248 if field['type'] == FieldDescriptor.TYPE_MESSAGE:
249
250 type_name = field['type_name'].strip('.')
251
252 maybe_map_value_type = check_if_map_entry(type_name)
253 if maybe_map_value_type:
254 # map-entries are inlined
255 repeated = False
256 property = {
257 'type': 'object',
258 'additionalProperties': maybe_map_value_type
259 }
260
261 elif type_name == 'google.protobuf.Timestamp':
262 # time-stamp is mapped back to JSON schema date-time string
263 property = {
264 'type': 'string',
265 'format': 'date-time'
266 }
267
268 else:
269 # normal nested object field
270 property = {
271 '$ref': '#/definitions/{}'.format(type_name)
272 }
273 referenced.add(type_name)
274
275 elif field['type'] == FieldDescriptor.TYPE_ENUM:
276 type_name = field['type_name'].strip('.')
277 property = {
278 '$ref': '#/definitions/{}'.format(type_name)
279 }
280 referenced.add(type_name)
281
282 elif field['type'] == FieldDescriptor.TYPE_GROUP:
283 raise NotImplementedError()
284
285 else:
286 _type, format = TYPE_MAP[field['type']]
287 property = {
288 'type': _type,
289 'format': format
290 }
291
292 if repeated:
293 property = {
294 'type': 'array',
295 'items': property
296 }
297
Zack Williams7eb36d02019-03-19 07:16:12 -0700298 if '_description' in field:
Zsolt Haraszti46c72002016-10-10 09:55:30 -0700299 property['description'] = field['_description']
300
301 return field['name'], property, referenced
302
303
304def generate_paths(methods_dict, definitions):
305
306 paths = {}
307
308 def _iterate():
309 for full_name, (service, method) in methods_dict.iteritems():
310 http_option = method['options']['http']
311 yield service, method, http_option
312 for binding in http_option.get('additional_bindings', []):
313 yield service, method, binding
314
315 def prune_path(path):
316 """rid '=<stuff>' pattern from path symbolic segments"""
317 segments = re_segment.findall(path)
318 pruned_segments = []
319 for absolute, symbolic in segments:
320 if symbolic:
321 full_symbol = symbolic[1:-1]
322 pruned_symbol = full_symbol.split('=', 2)[0]
323 pruned_segments.append('{' + pruned_symbol + '}')
324 else:
325 pruned_segments.append(absolute)
326
327 return '/' + '/'.join(pruned_segments)
328
329 def lookup_input_type(input_type_name):
330 return definitions[input_type_name.strip('.')]
331
332 def lookup_type(input_type, field_name):
333 local_field_name, _, rest = field_name.partition('.')
334 properties = input_type['properties']
Zack Williams7eb36d02019-03-19 07:16:12 -0700335 if local_field_name not in properties:
Zsolt Haraszti46c72002016-10-10 09:55:30 -0700336 raise InvalidPathArgumentError(
337 'Input type has no field {}'.format(field_name))
338 field = properties[local_field_name]
339 if rest:
340 field_type = field.get('type', 'object')
341 assert field_type == 'object', (
342 'Nested field name "%s" refers to field that of type "%s" '
343 '(.%s should be nested object field)'
344 % (field_name, field_type, local_field_name))
345 ref = field['$ref']
346 assert ref.startswith('#/definitions/')
347 type_name = ref.replace('#/definitions/', '')
348 nested_input_type = lookup_input_type(type_name)
349 return lookup_type(nested_input_type, rest)
350 else:
351 return field['type'], field['format']
352
353 def make_entry(service, method, http):
354 parameters = []
355 verb = None
356 for verb_candidate in ('get', 'delete', 'patch', 'post', 'put'):
357 if verb_candidate in http:
358 verb, path = verb_candidate, http[verb_candidate]
359 break
360 if 'custom' in http:
361 assert verb is None
362 verb = http['custom']['kind']
363 path = http['custom']['path']
364 assert verb is not None
365 path = prune_path(path)
366
367 # for each symbolic segment in path, add a path parameter entry
368 input_type = lookup_input_type(method['input_type'])
369 for segment in re_path_param.findall(path):
370 symbol = segment.split('=')[0]
371 _type, format = lookup_type(input_type, symbol)
372 parameters.append({
373 'in': 'path',
374 'name': symbol,
375 'required': True,
376 'type': _type,
377 'format': format
378 })
379
380 if 'body' in http:
381 if 'body' in http: # TODO validate if body lists fields
382 parameters.append({
383 'in': 'body',
384 'name': 'body',
385 'required': True,
386 'schema': {'$ref': '#/definitions/{}'.format(
387 method['input_type'].strip('.'))}
388 })
389
390 entry = {
391 'operationId': method['name'],
Zack Williams7eb36d02019-03-19 07:16:12 -0700392 'tags': [service['name'], ],
Zsolt Haraszti46c72002016-10-10 09:55:30 -0700393 'responses': {
394 '200': { # TODO: code is 201 and 209 in POST/DELETE?
395 'description': unicode(""), # TODO: ever filled by proto?
396 'schema': {
397 '$ref': '#/definitions/{}'.format(
Zack Williams7eb36d02019-03-19 07:16:12 -0700398 method['output_type'].strip('.'))
Zsolt Haraszti46c72002016-10-10 09:55:30 -0700399 }
400 },
401 # TODO shall we prefill with standard error (verb specific),
402 # such as 400, 403, 404, 409, 509, 500, 503 etc.
403 }
404 }
405
406 if parameters:
407 entry['parameters'] = parameters
408
409 summary, description = extract_summary_and_description(method)
410 if summary:
411 entry['summary'] = summary
412 if description:
413 entry['description'] = description
414
415 return path, verb, entry
416
417 for service, method, http in _iterate():
418 path, verb, entry = make_entry(service, method, http)
419 path_dict = paths.setdefault(path, {})
420 if verb in path_dict:
421 raise DuplicateMethodAndPathError(
422 'There is already a {} method defined for path ({})'.format(
Zack Williams7eb36d02019-03-19 07:16:12 -0700423 verb, path))
Zsolt Haraszti46c72002016-10-10 09:55:30 -0700424 path_dict[verb] = entry
425
426 return paths
427
428
429def extract_summary_and_description(obj):
430 """
431 Break raw _description field (if present) into a summary line and/or
432 detailed description text as follows:
433 * if text is a single line (not counting white-spaces), then it is a
434 summary and there is no detailed description.
435 * if text starts with a non-empty line followied by an empty line followed
436 by at least one non-empty line, that the 1s line is the summary and the
437 lines after the empty line is the description.
438 * in all other cases the text is considered a description and no summary
439 is generated.
440 """
441 assert isinstance(obj, dict)
442 summary, description = None, None
443 text = obj.get('_description', '')
444 if text:
445 s, blank, d = (text.split('\n', 2) + ['', ''])[:3] # so we can demux
446 if not blank.strip():
447 summary = s
448 if d.strip():
449 description = d
450 else:
451 description = text
452
453 return summary, description
454
455
456TYPE_MAP = {
Zack Williams7eb36d02019-03-19 07:16:12 -0700457 FieldDescriptor.TYPE_BOOL: ('boolean', 'boolean'),
458 FieldDescriptor.TYPE_BYTES: ('string', 'byte'),
459 FieldDescriptor.TYPE_DOUBLE: ('number', 'double'),
460 FieldDescriptor.TYPE_ENUM: ('string', 'string'),
461 FieldDescriptor.TYPE_FIXED32: ('integer', 'int64'),
462 FieldDescriptor.TYPE_FIXED64: ('string', 'uint64'),
463 FieldDescriptor.TYPE_FLOAT: ('number', 'float'),
464 FieldDescriptor.TYPE_INT32: ('integer', 'int32'),
465 FieldDescriptor.TYPE_INT64: ('string', 'int64'),
466 FieldDescriptor.TYPE_SFIXED32: ('integer', 'int32'),
467 FieldDescriptor.TYPE_SFIXED64: ('string', 'int64'),
468 FieldDescriptor.TYPE_STRING: ('string', 'string'),
469 FieldDescriptor.TYPE_SINT32: ('integer', 'int32'),
470 FieldDescriptor.TYPE_SINT64: ('string', 'int64'),
471 FieldDescriptor.TYPE_UINT32: ('integer', 'int64'),
472 FieldDescriptor.TYPE_UINT64: ('string', 'uint64'),
473 # FieldDescriptor.TYPE_MESSAGE:
474 # FieldDescriptor.TYPE_GROUP:
Zsolt Haraszti46c72002016-10-10 09:55:30 -0700475}