bugfinder.processing.interproc
- class bugfinder.processing.interproc.InterprocMerger(dataset, deprecation_warning=None)
Bases:
InterprocProcessing
- interproc_cmds_post = ['\n MATCH (s:GenericNode)-[:REACHES]->(d:GenericNode)\n WHERE s<>d AND NOT (d)-[:REACHES]->(:GenericNode)\n SET d:DataSinkNode\n ']
- interproc_cmds_pre = ['\n MATCH (cexpr:GenericNode {type:"CallExpression"})-[\n :IS_AST_PARENT\n ]->(func:GenericNode {type:"Callee"})\n WHERE func.code IN [\n "memcpy","memmove","gets","fgets","fgetws","sprintf",\n "swprintf","strcat","wcscat","strncat","wcsncat","strcpy",\n "wcscpy","strncpy","wcsncpy","wcstombs"\n ]\n WITH DISTINCT cexpr\n MATCH (cexpr)-[:IS_AST_PARENT]->(\n :GenericNode {type:"ArgumentList"}\n )-[:IS_AST_PARENT]->(\n arg:GenericNode {type:"Argument",childNum:"0"}\n )\n MATCH (arg)-[:USE]->(sym:GenericNode {type:"Symbol"})<-[:USE]-(\n expr:DownstreamNode\n )\n WHERE expr.type IN [\n "ExpressionStatement","IdentifierDeclStatement","ForInit",\n "Condition"\n ]\n AND (expr)-[:IS_AST_PARENT*]->(cexpr)\n MERGE (expr)-[r:DEF]->(sym)\n ', '\n MATCH (cexpr:GenericNode {type:"CallExpression"})-[\n :IS_AST_PARENT\n ]->(func:GenericNode {type:"Callee"})\n WHERE func.code IN [\n "scanf","wscanf","fscanf","fwscanf","sscanf","swscanf"\n ]\n WITH DISTINCT cexpr, case when func.code IN [\n "scanf","wscanf"\n ] then 1 else 2 end AS offset\n MATCH (cexpr)-[:IS_AST_PARENT]->(\n :GenericNode {type:"ArgumentList"}\n )-[:IS_AST_PARENT]->(arg:GenericNode {type:"Argument"})\n WHERE arg.childNum > offset\n MATCH (arg)-[:USE]->(sym:GenericNode {type:"Symbol"})<-[\n :USE\n ]-(expr:DownstreamNode)\n WHERE expr.type IN [\n "ExpressionStatement","IdentifierDeclStatement","ForInit",\n "Condition"\n ]\n AND (expr)-[:IS_AST_PARENT*]->(cexpr)\n MERGE (expr)-[r:DEF]->(sym)\n ']
- interproc_cmds_tc = ['\n MATCH (tc:GenericNode {type:"Testcase"})<-[\n :IS_FILE_OF\n ]-(:GenericNode {type:"File"})-[:IS_FILE_OF]->(\n func:GenericNode {type:"Function"}\n )-[:IS_FUNCTION_OF_CFG]->(\n callee:UpstreamNode {type:"CFGEntryNode"}\n ) // Get all function declarations IN the testcase\n WHERE ID(tc)=%d\n WITH tc,func,callee\n MATCH (tc)<-[:IS_FILE_OF]-(:GenericNode {type:"File"})-[\n :IS_FILE_OF\n ]->(:GenericNode {type:"Function"})-[\n :IS_FUNCTION_OF_CFG\n ]->(entry:UpstreamNode {type:"CFGEntryNode"})\n WITH func,callee,entry\n MATCH (entry)-[:CONTROLS*]->(caller:DownstreamNode)\n WHERE caller.type IN ["ExpressionStatement","Condition"]\n WITH func,callee,caller\n MATCH (caller)-[:IS_AST_PARENT*]->(cexpr:GenericNode {\n type:"CallExpression"})\n WHERE NOT (cexpr)<-[:IS_AST_PARENT*]-(\n :GenericNode {type:"CallExpression"}\n ) // Dodge nested function calls\n WITH func,callee,caller,cexpr\n MATCH (cexpr)-[r:IS_AST_PARENT]->(\n :GenericNode {type:"Callee",code:func.code}\n ) // Get all function calls within the testcase\n WITH callee,caller\n MERGE (caller)-[\n intercall:FLOWS_TO\n ]->(callee) // Connect the callee\'s entry point (head) to WHERE it is called\n WITH callee,caller,intercall\n MATCH (callee)-[:CONTROLS*]->(last:DownstreamNode)\n WITH callee,caller,intercall,last\n MATCH (last)-[\n :FLOWS_TO|DOM\n ]->(exit:DownstreamNode {type:"CFGExitNode"}) // Find the callee\'s exit node\n WITH caller,intercall,exit\n MATCH (caller)-[\n nextrel:FLOWS_TO\n ]->(\n next:DownstreamNode\n ) // Find the caller\'s next node IN its control flow graph\n WHERE next.type<>"CFGEntryNode"\n WITH DISTINCT caller,intercall,exit,nextrel,next\n MERGE (exit)-[\n interreturn:FLOWS_TO {callerid:ID(intercall)}\n ]->(next) // Connect the callee\'s tail to the caller\'s next node\n MERGE (caller)-[:SHORTCUT]->(next)\n // Delete the edge between the function call AND its next step,\n // so that the control flow graph now goes through the callee AND\n // returns to the callers next step\n DELETE nextrel\n ', '\n MATCH (tc:GenericNode {type:"Testcase"})<-[:IS_FILE_OF]-(\n :GenericNode {type:"File"}\n )-[:IS_FILE_OF]->(\n :GenericNode {type:"Function",code:"main"}\n )-[:IS_FUNCTION_OF_CFG]->(main:UpstreamNode {type:"CFGEntryNode"})\n WHERE ID(tc)=%d\n WITH DISTINCT main\n MATCH (main)-[:FLOWS_TO*]->(\n n1:GenericNode\n )-[:DEF]->(\n sym1:GenericNode {type:"Symbol"}\n ) // Find the initialization of the global variable\n WHERE NOT LEFT(sym1.code,2)="* "\n AND NOT sym1.code contains " . "\n AND NOT sym1.code=TOUPPER(sym1.code)\n AND NOT sym1.code IN ["NULL","L","stdin","& wsaData"]\n AND NOT (sym1)<-[:DEF]-(:GenericNode {type:"IdentifierDeclStatement"})\n AND NOT (sym1)<-[:DEF]-(:GenericNode {type:"Parameter"})\n AND NOT (sym1)<-[:USE]-(:GenericNode)-[\n :IS_AST_PARENT*\n ]->(:GenericNode {type:"Callee",code:sym1.code})\n WITH n1, sym1\n MATCH p=(n1)-[:FLOWS_TO*]->(n2:GenericNode)-[:USE]->(\n sym2:GenericNode {type:"Symbol",code:sym1.code}\n ) // Find the next node n2 that uses the global variable\n WHERE n1<>n2 AND NONE(n IN NODES(p)[1..-1]\n WHERE (n)-[:DEF]->(\n :GenericNode {type:"Symbol",code:sym1.code})\n ) // Ensure the value has NOT been modified since n1\n MERGE (n1)-[:REACHES {var:sym1.code}]->(n2)\n ', '\n MATCH (tc:GenericNode {type:"Testcase"})<-[:IS_FILE_OF]-(\n :GenericNode {type:"File"}\n )-[:IS_FILE_OF]->(\n :GenericNode {type:"Function"}\n )-[:IS_FUNCTION_OF_CFG]->(entry:UpstreamNode {type:"CFGEntryNode"})\n WHERE ID(tc)=%d\n WITH DISTINCT entry\n MATCH (entry)-[:CONTROLS*]->(n:DownstreamNode)\n WITH DISTINCT n\n MATCH (n)-[:DEF|USE]->(sym0:GenericNode {type:"Symbol"})\n WITH DISTINCT sym0\n MATCH (sym0)<-[r0:DEF]-(expr:DownstreamNode)-[r1:USE]->(\n sym1:GenericNode {type:"Symbol"}\n )\n WHERE expr.type IN ["ExpressionStatement","Condition"]\n AND sym0.code="* "+sym1.code\n MERGE (expr)-[r2:DEF]->(sym1)\n ', '\n MATCH (tc:GenericNode {type:"Testcase"})<-[:IS_FILE_OF]-(:GenericNode {\n type:"File"})-[:IS_FILE_OF]->(:GenericNode {\n type:"Function"})-[:IS_FUNCTION_OF_CFG]->(entry:UpstreamNode {\n type:"CFGEntryNode"})\n WHERE ID(tc)=%d\n WITH DISTINCT entry\n MATCH (entry)-[:CONTROLS*]->(n:DownstreamNode)\n WITH DISTINCT n\n MATCH (n)-[:IS_AST_PARENT*]->(uop:GenericNode {\n type:"UnaryOperator",code:"&"})\n WITH DISTINCT uop\n MATCH (uop)<-[:IS_AST_PARENT]-(uexpr:GenericNode {\n type:"UnaryOperationExpression"})-[:IS_AST_PARENT]->(idf:GenericNode {\n type:"Identifier"})\n WITH uexpr,idf\n MATCH (uexpr)<-[:IS_AST_PARENT*]-(expr:DownstreamNode)\n WHERE expr.type IN [\n "ExpressionStatement","IdentifierDeclStatement","ForInit","Condition"]\n WITH expr,idf\n MATCH (expr)-[:USE]->(adr_sym:GenericNode {\n type:"Symbol",code:"& "+idf.code})\n WITH expr,adr_sym,idf\n MATCH (expr)<-[:FLOWS_TO*]-(def:DownstreamNode)-[:DEF]->(\n def_sym:GenericNode {type:"Symbol",code:idf.code})\n WHERE expr<>def AND def.type IN ["IdentifierDeclStatement","Parameter"]\n MERGE (def)-[rdef:DEF {var:idf.code}]->(adr_sym)\n MERGE (def)-[dflr:REACHES {var:adr_sym.code}]->(expr)\n WITH DISTINCT expr\n MATCH (ptr_sym:GenericNode {type:"Symbol"})<-[:DEF]-(expr)-[:FLOWS_TO*]-(\n usr:DownstreamNode {type:"ExpressionStatement"})-[:USE]->(\n star_sym:GenericNode {type:"Symbol",code:"* "+ptr_sym.code})\n WHERE expr<>usr\n MERGE (expr)-[sdef:DEF {var:ptr_sym.code}]->(star_sym)\n ', '\n MATCH (tc:GenericNode {type:"Testcase"})<-[:IS_FILE_OF]-(:GenericNode {\n type:"File"})-[:IS_FILE_OF]->(:GenericNode {\n type:"Function"})-[:IS_FUNCTION_OF_CFG]->(entry:UpstreamNode {\n type:"CFGEntryNode"})\n WHERE ID(tc)=%d\n WITH DISTINCT entry\n MATCH (entry)-[:CONTROLS*]->(n:DownstreamNode)\n WITH DISTINCT n\n MATCH (n)-[:DEF|USE]->(sym:GenericNode {type:"Symbol"})\n WITH DISTINCT sym\n MATCH (clr:GenericNode)-[use:USE]->(sym)<-[def:DEF]-(src:DownstreamNode)-[\n cf:FLOWS_TO*]->(clr)\n WHERE clr<>src AND NOT (src)-[:REACHES {var:sym.code}]->(clr)\n MERGE (src)-[ndf:REACHES {var:sym.code}]->(clr)\n ', '\n MATCH (tc:GenericNode {type:"Testcase"})<-[\n :IS_FILE_OF\n ]-(:GenericNode {type:"File"})-[\n :IS_FILE_OF\n ]->(:GenericNode {type:"Function"})-[\n :IS_FUNCTION_OF_CFG\n ]->(entry:UpstreamNode {type:"CFGEntryNode"})\n WHERE ID(tc)=%d\n WITH DISTINCT entry\n MATCH (entry)-[:CONTROLS*]->(caller:DownstreamNode)\n WHERE caller.type IN ["ExpressionStatement","Condition"]\n WITH DISTINCT caller\n MATCH (caller)-[:IS_AST_PARENT*]->(cexpr:GenericNode {\n type:"CallExpression"})\n WHERE NOT (cexpr)<-[:IS_AST_PARENT*]-(:GenericNode {type:"CallExpression"})\n WITH caller,cexpr\n MATCH (caller)-[calrel:FLOWS_TO]->(callee:UpstreamNode {\n type:"CFGEntryNode"})\n WITH caller,cexpr,calrel,callee\n MATCH (cexpr)-[:IS_AST_PARENT]->(\n arglst:GenericNode {type:"ArgumentList"}\n )-[:IS_AST_PARENT]->(\n arg:GenericNode {type:"Argument"}\n )-[:USE]->(sym:GenericNode {type:"Symbol"})<-[:USE]-(\n caller\n )<-[df0:REACHES]-(src:DownstreamNode)-[:DEF]->(sym)\n DELETE df0\n WITH caller,calrel,callee,arg,src\n MATCH (callee)-[:FLOWS_TO|CONTROLS]->(\n param:DownstreamNode {type:"Parameter",childNum:arg.childNum}\n )-[:DEF]->(sym:GenericNode {type:"Symbol"})\n WITH caller,calrel,callee,src,param,sym\n MERGE (src)-[:REACHES {var:sym.code}]->(param)\n WITH caller,calrel,callee,param,sym\n MATCH (param)-[rpr:REACHES]->()\n set rpr.src=sym.code\n WITH caller,calrel,callee\n MATCH (callee)-[:CONTROLS*]->(ret:DownstreamNode {type:"ReturnStatement"})\n WHERE callee<>ret\n MERGE (ret)-[:REACHES {callerid:ID(calrel)}]->(caller)\n ', '\n MATCH (tc:GenericNode {type:"Testcase"})<-[\n :IS_FILE_OF\n ]-(:GenericNode {type:"File"})-[\n :IS_FILE_OF\n ]->(:GenericNode {type:"Function",code:"main"})-[\n :IS_FUNCTION_OF_CFG\n ]->(main:UpstreamNode {type:"CFGEntryNode"})\n WHERE ID(tc)=%d\n WITH DISTINCT main\n // Find sources (start of data flow on the control flow path)\n MATCH (main)-[:FLOWS_TO*]->(src:GenericNode)\n WHERE (src)-[:REACHES]->() AND NOT (src)<-[:REACHES]-()\n WITH DISTINCT src\n // Find destinations (end of dataflow)\n MATCH pm1=(src)-[r1:REACHES*]->(dst:GenericNode)\n WHERE NOT (dst)-[:REACHES]->()\n AND ALL(idx IN RANGE(1,SIZE(r1)-1)\n // Ensure we follow the same variable\n WHERE r1[idx-1].var IN [r1[idx].var, r1[idx].src])\n WITH DISTINCT src, dst, r1[-1].var AS var, pm1\n ORDER BY LENGTH(pm1) // Order paths by length\n // Group path by source, destination AND variable\n WITH DISTINCT src, dst, var, COLLECT(pm1) AS paths\n UNWIND RANGE(0, SIZE(paths)-2) AS idx\n WITH src, dst, paths[idx] AS shorter, paths[idx+1] AS longer\n // Check if the shorter path is a subset of the longer path\n WHERE ALL(n IN NODES(shorter) WHERE n IN NODES(longer))\n // Retrieve extraneous relationship / shortcuts IN the shorter path\n WITH src, dst, FILTER(\n r IN RELATIONSHIPS(shorter) WHERE NOT r IN RELATIONSHIPS(longer)\n ) AS xr\n // Delete the shortcuts\n FOREACH(r IN xr | DELETE r)\n ', '\n MATCH (tc:GenericNode {type:"Testcase"})<-[\n :IS_FILE_OF\n ]-(:GenericNode {type:"File"})-[\n :IS_FILE_OF\n ]->(:GenericNode {type:"Function",code:"main"})-[\n :IS_FUNCTION_OF_CFG\n ]->(main:UpstreamNode {type:"CFGEntryNode"})\n WHERE ID(tc)=%d\n WITH DISTINCT main\n MATCH (main)-[:FLOWS_TO*]->(:GenericNode)-[r1:REACHES]->(n:GenericNode)-[\n r2:REACHES]->()\n WHERE EXISTS(r1.size) AND NOT EXISTS(r2.size)\n AND r1.var IN [r2.var, r2.src]\n SET r2.size=r1.size\n WITH DISTINCT n, r1, r2\n MATCH p=(n)-[r2]->(:GenericNode)-[:REACHES*]->(:GenericNode)\n WHERE ALL(idx IN RANGE(1, SIZE(RELATIONSHIPS(p))-1)\n WHERE NOT EXISTS(RELATIONSHIPS(p)[idx].size)\n AND RELATIONSHIPS(p)[idx-1].var IN [RELATIONSHIPS(p)[idx].var,\n RELATIONSHIPS(p)[idx].src])\n UNWIND RELATIONSHIPS(p) AS r3\n SET r3.size=r1.size\n ']
- class bugfinder.processing.interproc.InterprocProcessing(dataset, deprecation_warning=None)
Bases:
Neo4J3Processing
- assign_ports()
Randomly assign ports on the machine.
- configure_command(command)
Configure the command to be sent to the container. Needs to be implemented by the subclass.
- Parameters
command –
- configure_container()
Setup container variables.
- configure_container_with_dict(container_config)
Configure the given container manually. Needs to be implemented by the subclass.
- Parameters
container_config –
- interproc_cmds_post = []
- interproc_cmds_pre = []
- interproc_cmds_tc = []
- log_input = None
- log_output = None
- send_commands()
Send commands to the container.
- bugfinder.processing.interproc.interproc_worker(progress_bar, cmds, tcid, q, port)