1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#!/usr/bin/env python3
# cs/net/proto/codegen/parser.py
import re
from cs.net.proto.codegen.codegen_types import (
Proto,
Field,
ProtoDB,
Types,
ValidationAttr,
)
def _split_top_level_commas(s: str) -> list[str]:
parts = []
buf = []
depth = 0
for ch in s:
if ch == "(":
depth += 1
elif ch == ")":
depth = max(0, depth - 1)
if ch == "," and depth == 0:
parts.append("".join(buf).strip())
buf = []
else:
buf.append(ch)
if buf:
parts.append("".join(buf).strip())
return [p for p in parts if p]
def _parse_attr(token: str) -> ValidationAttr:
token = token.strip()
if token.startswith("::validate::"):
token = token[len("::validate::") :]
elif token.startswith("validate::"):
token = token[len("validate::") :]
name = token
args: list = []
if "(" in token and token.endswith(")"):
name, arg_str = token.split("(", 1)
arg_str = arg_str[:-1] # remove trailing )
raw_args = _split_top_level_commas(arg_str)
for a in raw_args:
# Recursively parse nested validators (e.g., each(len_lt(32)))
if "(" in a:
args.append(_parse_attr(a))
else:
args.append(a.strip())
return ValidationAttr(name=name.strip(), args=args)
def ParseInputHeader(input_string: str, filename: str) -> ProtoDB:
"""
Parses a C++ header file containing DECLARE_PROTO structs
and returns a ProtoDB mapping struct names to Proto objects.
"""
# Allow namespaces with underscores, digits, and nested qualifiers.
ns_matches = re.findall(r"namespace\s+([a-zA-Z0-9_:]+)", input_string)
if not ns_matches:
raise ValueError(f"No namespace found in file {filename}")
namespace = ns_matches[0]
PROTOS: ProtoDB = {}
struct_pattern = re.findall(
r"DECLARE_PROTO\((\w+)\)[\s]*\{([^}]*)\};", input_string
)
for struct_name, body in struct_pattern:
# Parse fields manually to handle nested angle brackets
# Remove comments first
body_no_comments = re.sub(r"//.*?$", "", body, flags=re.MULTILINE)
# Split by semicolons, but need to handle nested brackets
# Use a simpler approach: find all field declarations
# Pattern: type field_name;
# But type can have nested brackets, so we need to match from the end
matches = []
# Split by semicolon, but be careful about nested brackets
parts = re.split(r";(?![^<>]*>)", body_no_comments)
for part in parts:
part = part.strip()
if not part or part.startswith("//"):
continue
# Find the last word (field name) before semicolon
# Everything before that is the type
name_match = re.search(r"(\w+)\s*$", part)
if name_match:
field_name = name_match.group(1)
type_part = part[: name_match.start()].strip()
if type_part: # Only add if we have a type
matches.append((type_part, field_name))
fields = []
for field_type, field_name in matches:
attrs: list[ValidationAttr] = []
field_type = field_type.strip()
# Extract validation attributes of the form [[validate::...]]
while field_type.startswith("[["):
end_idx = field_type.find("]]")
if end_idx == -1:
break
attr_block = field_type[2:end_idx]
tokens = _split_top_level_commas(attr_block)
attrs.extend(_parse_attr(tok) for tok in tokens if tok)
field_type = field_type[end_idx + 2 :].strip()
# Capture original type before normalization
original_type = field_type
# Normalize field types
if field_type.endswith("*"):
field_type = field_type[:-1]
elif field_type in ("string", "std::string"):
field_type = Types.STRING
elif field_type.startswith("std::vector") or field_type.startswith(
"vector"
):
# Keep the full type including template parameters
pass
elif field_type.startswith("std::map") or field_type.startswith("map"):
# Keep the full type including template parameters
pass
fields.append(
Field(
name=field_name,
type=field_type,
original_type=original_type,
validations=attrs,
)
)
PROTOS[struct_name] = Proto(
name=struct_name,
fields=fields,
namespace=namespace,
filename=filename,
)
return PROTOS