import logging from collections import defaultdict from dataclasses import dataclass, field from typing import Any, Dict, List, Optional from .clang.cindex import * logger = logging.getLogger(__file__) logging.basicConfig(level=logging.INFO) @dataclass class Variable: name: str type: str parent: Any = None constant: bool = False static: bool = False default: Any = None @dataclass class LiteralVariable(Variable): literal: str = None literal_valid: bool = True @dataclass class Enum: name: str type: str parent: "Namespace" = None values: Dict[str, Variable] = field(default_factory=dict) is_strong_typed: bool = False @property def full_name(self): if self.parent is None: # return "::" + self.name return self.name return self.parent.full_name + "::" + self.name def full_name_of(self, v: Variable): return self.full_name + "::" + v.name @dataclass class Function: name: str ret_type: str parent: "Namespace" = None args: List[Variable] = field(default_factory=list) calling_convention: str = "__cdecl" @property def type(self, show_calling_convention: bool = False): args = ",".join([i.type for i in self.args]) calling = self.calling_convention + ' ' if show_calling_convention else '' return f"{self.ret_type}({calling} *)({args})" @property def full_name(self): return f"::{self.name}" @property def full_signature(self): s = f"{self.name} (" for arg in self.args: s += arg.type + ' ' + arg.name + ',' s = s[:-2] + ')' return s def __str__(self): return self.full_signature @dataclass class Namespace: name: str = '' parent: "Namespace" = None enums: Dict[str, Enum] = field(default_factory=dict) typedefs: Dict[str, str] = field(default_factory=dict) classes: Dict[str, 'Class'] = field(default_factory=dict) variables: Dict[str, Variable] = field(default_factory=dict) functions: Dict[str, List[Function]] = field(default_factory=(lambda: defaultdict(list))) @property def full_name(self): if self.parent is None: return self.name return self.parent.full_name + "::" + self.name @dataclass class Class(Namespace): functions: Dict[str, List['Method']] = field(default_factory=(lambda: defaultdict(list))) constructors: List['Method'] = field(default_factory=list) destructor: 'Method' = None is_polymorphic: bool = False def __str__(self): return "class " + self.name # without this, PyCharm will crash def __repr__(self): return "class" + self.name @dataclass class Method(Function): name: str ret_type: str parent: Class = None access: str = 'public' is_virtual: bool = False is_pure_virtual: bool = False is_static: bool = False is_final: bool = False @property def type(self, show_calling_convention: bool = False): args = ",".join([i.type for i in self.args]) calling = self.calling_convention + ' ' if show_calling_convention else '' parent_prefix = "" if not self.is_static: parent_prefix = f"{self.parent.full_name}::" return f"{self.ret_type}({calling}{parent_prefix}*)({args})" @property def full_name(self): return f"{self.parent.name}::{self.name}" @property def full_signature(self): return "{} {}{} {}::" \ .format(self.access, 'virtual' if self.is_virtual else '', 'static' if self.is_static else '', self.parent.name) \ + super().full_signature \ + (' = 0' if self.is_pure_virtual else '') def __str__(self): return self.full_signature @dataclass() class CXXParseResult(Namespace): macros: Dict[str, str] = field(default_factory=dict) class CXXFileParser: def __init__(self, file_path: Optional[str], unsaved_files=None): self.unsaved_files = unsaved_files self.file_path = file_path def parse(self)->CXXParseResult: idx = Index.create() rs = idx.parse(self.file_path, args="-std=c++11 ".split(' '), unsaved_files=self.unsaved_files, options=(TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD | TranslationUnit.PARSE_SKIP_FUNCTION_BODIES | TranslationUnit.PARSE_INCLUDE_BRIEF_COMMENTS_IN_CODE_COMPLETION ) ) result = CXXParseResult() # todo: parse namespace for c in rs.cursor.walk_preorder(): if c.kind == CursorKind.FUNCTION_DECL: func = CXXFileParser._process_function(c) result.functions[func.name] = func elif c.kind == CursorKind.ENUM_DECL: e = CXXFileParser._process_enum(c) result.enums[e.name] = e elif c.kind == CursorKind.CLASS_DECL or c.kind == CursorKind.STRUCT_DECL: class_ = CXXFileParser._process_class(c) cname = class_.name result.classes[cname] = class_ elif c.kind == CursorKind.VAR_DECL: name, value = CXXFileParser._process_variable(c) if value: result.variables[name] = value elif c.kind == CursorKind.TYPEDEF_DECL: name, target = CXXFileParser._process_typedef(c) result.typedefs[name] = target elif c.kind == CursorKind.MACRO_DEFINITION: name, definition = CXXFileParser._process_macro_definition(c) result.macros[name] = definition elif False \ or c.kind == CursorKind.ENUM_CONSTANT_DECL \ or c.kind == CursorKind.CXX_METHOD \ or c.kind == CursorKind.CXX_FINAL_ATTR \ or c.kind == CursorKind.DESTRUCTOR \ or c.kind == CursorKind.PARM_DECL \ or c.kind == CursorKind.CXX_ACCESS_SPEC_DECL \ or c.kind == CursorKind.FIELD_DECL: pass elif c.kind == CursorKind.COMPOUND_STMT: # ignore any body pass elif CXXFileParser._is_literal_cursor(c) \ or c.kind == CursorKind.MACRO_INSTANTIATION \ or c.kind == CursorKind.INCLUSION_DIRECTIVE: # just not need to process pass elif c.kind == CursorKind.TYPE_REF \ or c.kind == CursorKind.UNEXPOSED_EXPR \ or c.kind == CursorKind.TRANSLATION_UNIT: # i don't know what those are pass else: logging.warning("unrecognized cursor kind: %s, %s, %s", c.kind, c.spelling, c.extent) return result @staticmethod def _process_function(c: Cursor): func = Function(name=c.displayname, ret_type=c.result_type.spelling, args=[ Variable(name=ac.displayname, type=ac.type.spelling) for ac in c.get_children() ], ) return func @staticmethod def _process_method(c: Cursor, class_): func = Method( parent=class_, name=c.spelling, ret_type=c.result_type.spelling, access=c.access_specifier.name.lower(), is_virtual=c.is_virtual_method(), is_pure_virtual=c.is_pure_virtual_method(), is_static=c.is_static_method(), ) for ac in c.get_children(): if ac.kind == CursorKind.PARM_DECL: arg = Variable(ac.displayname, ac.type.spelling) func.args.append(arg) elif ac.kind == CursorKind.CXX_FINAL_ATTR: func.is_final = True elif ac.kind == CursorKind.COMPOUND_STMT: # we don't care about the function body pass elif ac.kind == CursorKind.TYPE_REF: # I don't what this is, maybe a mistake of clang? pass else: logger.warning("unknown kind in cxx_method child: %s %s", ac.kind, ac.extent) return func @staticmethod def _process_class(c: Cursor): # noinspection PyArgumentList class_ = Class(name=c.displayname) for ac in c.get_children(): if ac.kind == CursorKind.CONSTRUCTOR: func = CXXFileParser._process_method(ac, class_) if func.is_virtual: class_.is_polymorphic = True class_.constructors = func elif ac.kind == CursorKind.DESTRUCTOR: func = CXXFileParser._process_method(ac, class_) if func.is_virtual: class_.is_polymorphic = True class_.destructor = func elif ac.kind == CursorKind.FIELD_DECL: v = Variable(ac.spelling, ac.type.spelling) class_.variables[v.name] = v elif ac.kind == CursorKind.CXX_METHOD: func = CXXFileParser._process_method(ac, class_) if func.is_virtual: class_.is_polymorphic = True class_.functions[func.name].append(func) elif ac.kind == CursorKind.CXX_ACCESS_SPEC_DECL: pass else: logger.warning("unknown kind in class child, and not handled: %s %s", ac.kind, ac.extent) return class_ @staticmethod def _process_enum(c: Cursor): e = Enum(name=c.spelling, type=c.enum_type.spelling ) for i in list(c.get_children()): e.values[i.spelling] = Variable(name=i.spelling, type=e.name, default=i.enum_value, ) return e @staticmethod def _process_variable(c: Cursor): children = list(c.get_children()) length = len(children) if length == 1: child = children[0] if CXXFileParser._is_literal_cursor(child): value = CXXFileParser._process_literal(child) return c.spelling, value logger.warning("unable to process variable : %s %s", c.spelling, c.extent) return c.spelling, None @staticmethod def _process_typedef(c: Cursor): return c.spelling, c.underlying_typedef_type.spelling @staticmethod def _process_macro_definition(c: Cursor): name = c.spelling tokens = list(c.get_tokens()) length = len(tokens) if length == 1: return name, '' return name, ' '.join([i.spelling for i in tokens[1:]]) @staticmethod def _get_source_from_file(file, start, end, encoding='utf-8'): with open(file, 'rb') as f: f.seek(start) return f.read(end - start).decode(encoding=encoding) @staticmethod def _get_source(token: Token, encoding='utf-8'): return CXXFileParser._get_source_from_file(token.location.file.name, token.extent.start.offset, token.extent.end.offset, encoding) LITERAL_KINDS = { CursorKind.INTEGER_LITERAL, CursorKind.STRING_LITERAL, CursorKind.CHARACTER_LITERAL, CursorKind.CXX_NULL_PTR_LITERAL_EXPR, CursorKind.FLOATING_LITERAL, CursorKind.IMAGINARY_LITERAL, CursorKind.CXX_BOOL_LITERAL_EXPR, # CursorKind.OBJC_STRING_LITERAL, # CursorKind.OBJ_BOOL_LITERAL_EXPR, # CursorKind.COMPOUND_LITERAL_EXPR, } @staticmethod def _is_literal_cursor(c: Cursor): return c.kind in CXXFileParser.LITERAL_KINDS # return str(c)[-9:-1] == 'LITERAL' @staticmethod def _process_literal(c): tokens = list(c.get_tokens()) if len(tokens) == 1: spelling = tokens[0].spelling if c.kind == CursorKind.INTEGER_LITERAL: return int(spelling) elif c.kind == CursorKind.STRING_LITERAL: return str(spelling) elif c.kind == CursorKind.CHARACTER_LITERAL: return CXXFileParser.character_literal_to_int(spelling) elif c.kind == CursorKind.FLOATING_LITERAL: return float(spelling) logger.warning("unknown literal : %s, %s %s", c.kind, c.spelling, c.extent) return None @staticmethod def character_literal_to_int(string): s = 0 for i in string.encode(): s = s * 255 + i return s pass class CXXParser(CXXFileParser): def __init__(self, files: List[str]): dummy_code = "" for file in files: dummy_code += f'#include "{file}"\n' dummy_name = "dummy.cpp" super().__init__(dummy_name, unsaved_files=[ [dummy_name, dummy_code] ] ) Config.set_library_path("")