bugfinder.processing.interproc

class bugfinder.processing.interproc.InterprocMerger(dataset, deprecation_warning=None)

Bases: InterprocProcessing

interproc_cmds_post = ['\n            MATCH (s:GenericNode)-[:REACHES]->(d:GenericNode)\n            WHERE s<>d AND NOT (d)-[:REACHES]->(:GenericNode)\n            SET d:DataSinkNode\n        ']
interproc_cmds_pre = ['\n            MATCH (cexpr:GenericNode {type:"CallExpression"})-[\n                :IS_AST_PARENT\n            ]->(func:GenericNode {type:"Callee"})\n            WHERE func.code IN [\n                "memcpy","memmove","gets","fgets","fgetws","sprintf",\n                "swprintf","strcat","wcscat","strncat","wcsncat","strcpy",\n                "wcscpy","strncpy","wcsncpy","wcstombs"\n            ]\n            WITH DISTINCT cexpr\n            MATCH (cexpr)-[:IS_AST_PARENT]->(\n                :GenericNode {type:"ArgumentList"}\n            )-[:IS_AST_PARENT]->(\n                arg:GenericNode {type:"Argument",childNum:"0"}\n            )\n            MATCH (arg)-[:USE]->(sym:GenericNode {type:"Symbol"})<-[:USE]-(\n                expr:DownstreamNode\n            )\n            WHERE expr.type IN [\n                "ExpressionStatement","IdentifierDeclStatement","ForInit",\n                "Condition"\n            ]\n            AND (expr)-[:IS_AST_PARENT*]->(cexpr)\n            MERGE (expr)-[r:DEF]->(sym)\n        ', '\n            MATCH (cexpr:GenericNode {type:"CallExpression"})-[\n                :IS_AST_PARENT\n            ]->(func:GenericNode {type:"Callee"})\n            WHERE func.code IN [\n                "scanf","wscanf","fscanf","fwscanf","sscanf","swscanf"\n            ]\n            WITH DISTINCT cexpr, case when func.code IN [\n                "scanf","wscanf"\n            ] then 1 else 2 end AS offset\n            MATCH (cexpr)-[:IS_AST_PARENT]->(\n                :GenericNode {type:"ArgumentList"}\n            )-[:IS_AST_PARENT]->(arg:GenericNode {type:"Argument"})\n            WHERE arg.childNum > offset\n            MATCH (arg)-[:USE]->(sym:GenericNode {type:"Symbol"})<-[\n                :USE\n            ]-(expr:DownstreamNode)\n            WHERE expr.type IN [\n                "ExpressionStatement","IdentifierDeclStatement","ForInit",\n                "Condition"\n            ]\n            AND (expr)-[:IS_AST_PARENT*]->(cexpr)\n            MERGE (expr)-[r:DEF]->(sym)\n        ']
interproc_cmds_tc = ['\n            MATCH (tc:GenericNode {type:"Testcase"})<-[\n                :IS_FILE_OF\n            ]-(:GenericNode {type:"File"})-[:IS_FILE_OF]->(\n                func:GenericNode {type:"Function"}\n            )-[:IS_FUNCTION_OF_CFG]->(\n                callee:UpstreamNode {type:"CFGEntryNode"}\n            ) // Get all function declarations IN the testcase\n            WHERE ID(tc)=%d\n            WITH tc,func,callee\n            MATCH (tc)<-[:IS_FILE_OF]-(:GenericNode {type:"File"})-[\n                :IS_FILE_OF\n            ]->(:GenericNode {type:"Function"})-[\n                :IS_FUNCTION_OF_CFG\n            ]->(entry:UpstreamNode {type:"CFGEntryNode"})\n            WITH func,callee,entry\n            MATCH (entry)-[:CONTROLS*]->(caller:DownstreamNode)\n            WHERE caller.type IN ["ExpressionStatement","Condition"]\n            WITH func,callee,caller\n            MATCH (caller)-[:IS_AST_PARENT*]->(cexpr:GenericNode {\n                type:"CallExpression"})\n            WHERE NOT (cexpr)<-[:IS_AST_PARENT*]-(\n                :GenericNode {type:"CallExpression"}\n            ) // Dodge nested function calls\n            WITH func,callee,caller,cexpr\n            MATCH (cexpr)-[r:IS_AST_PARENT]->(\n                :GenericNode {type:"Callee",code:func.code}\n            ) // Get all function calls within the testcase\n            WITH callee,caller\n            MERGE (caller)-[\n                intercall:FLOWS_TO\n            ]->(callee) // Connect the callee\'s entry point (head) to WHERE it is called\n            WITH callee,caller,intercall\n            MATCH (callee)-[:CONTROLS*]->(last:DownstreamNode)\n            WITH callee,caller,intercall,last\n            MATCH (last)-[\n                :FLOWS_TO|DOM\n            ]->(exit:DownstreamNode {type:"CFGExitNode"}) // Find the callee\'s exit node\n            WITH caller,intercall,exit\n            MATCH (caller)-[\n                nextrel:FLOWS_TO\n            ]->(\n                next:DownstreamNode\n            ) // Find the caller\'s next node IN its control flow graph\n            WHERE next.type<>"CFGEntryNode"\n            WITH DISTINCT caller,intercall,exit,nextrel,next\n            MERGE (exit)-[\n                interreturn:FLOWS_TO {callerid:ID(intercall)}\n            ]->(next) // Connect the callee\'s tail to the caller\'s next node\n            MERGE (caller)-[:SHORTCUT]->(next)\n            // Delete the edge between the function call AND its next step,\n            // so that the control flow graph now goes through the callee AND\n            // returns to the callers next step\n            DELETE nextrel\n        ', '\n            MATCH (tc:GenericNode {type:"Testcase"})<-[:IS_FILE_OF]-(\n                :GenericNode {type:"File"}\n            )-[:IS_FILE_OF]->(\n                :GenericNode {type:"Function",code:"main"}\n            )-[:IS_FUNCTION_OF_CFG]->(main:UpstreamNode {type:"CFGEntryNode"})\n            WHERE ID(tc)=%d\n            WITH DISTINCT main\n            MATCH (main)-[:FLOWS_TO*]->(\n                n1:GenericNode\n            )-[:DEF]->(\n                sym1:GenericNode {type:"Symbol"}\n            ) // Find the initialization of the global variable\n            WHERE NOT LEFT(sym1.code,2)="* "\n                AND NOT sym1.code contains " . "\n                AND NOT sym1.code=TOUPPER(sym1.code)\n                AND NOT sym1.code IN ["NULL","L","stdin","& wsaData"]\n                AND NOT (sym1)<-[:DEF]-(:GenericNode {type:"IdentifierDeclStatement"})\n                AND NOT (sym1)<-[:DEF]-(:GenericNode {type:"Parameter"})\n                AND NOT (sym1)<-[:USE]-(:GenericNode)-[\n                    :IS_AST_PARENT*\n                ]->(:GenericNode {type:"Callee",code:sym1.code})\n            WITH n1, sym1\n            MATCH p=(n1)-[:FLOWS_TO*]->(n2:GenericNode)-[:USE]->(\n                sym2:GenericNode {type:"Symbol",code:sym1.code}\n            ) // Find the next node n2 that uses the global variable\n            WHERE n1<>n2 AND NONE(n IN NODES(p)[1..-1]\n            WHERE (n)-[:DEF]->(\n                :GenericNode {type:"Symbol",code:sym1.code})\n            ) // Ensure the value has NOT been modified since n1\n            MERGE (n1)-[:REACHES {var:sym1.code}]->(n2)\n        ', '\n            MATCH (tc:GenericNode {type:"Testcase"})<-[:IS_FILE_OF]-(\n                :GenericNode {type:"File"}\n            )-[:IS_FILE_OF]->(\n                :GenericNode {type:"Function"}\n            )-[:IS_FUNCTION_OF_CFG]->(entry:UpstreamNode {type:"CFGEntryNode"})\n            WHERE ID(tc)=%d\n            WITH DISTINCT entry\n            MATCH (entry)-[:CONTROLS*]->(n:DownstreamNode)\n            WITH DISTINCT n\n            MATCH (n)-[:DEF|USE]->(sym0:GenericNode {type:"Symbol"})\n            WITH DISTINCT sym0\n            MATCH (sym0)<-[r0:DEF]-(expr:DownstreamNode)-[r1:USE]->(\n                sym1:GenericNode {type:"Symbol"}\n            )\n            WHERE expr.type IN ["ExpressionStatement","Condition"]\n                AND sym0.code="* "+sym1.code\n            MERGE (expr)-[r2:DEF]->(sym1)\n        ', '\n            MATCH (tc:GenericNode {type:"Testcase"})<-[:IS_FILE_OF]-(:GenericNode {\n                type:"File"})-[:IS_FILE_OF]->(:GenericNode {\n                type:"Function"})-[:IS_FUNCTION_OF_CFG]->(entry:UpstreamNode {\n                type:"CFGEntryNode"})\n            WHERE ID(tc)=%d\n            WITH DISTINCT entry\n            MATCH (entry)-[:CONTROLS*]->(n:DownstreamNode)\n            WITH DISTINCT n\n            MATCH (n)-[:IS_AST_PARENT*]->(uop:GenericNode {\n                type:"UnaryOperator",code:"&"})\n            WITH DISTINCT uop\n            MATCH (uop)<-[:IS_AST_PARENT]-(uexpr:GenericNode {\n                type:"UnaryOperationExpression"})-[:IS_AST_PARENT]->(idf:GenericNode {\n                type:"Identifier"})\n            WITH uexpr,idf\n            MATCH (uexpr)<-[:IS_AST_PARENT*]-(expr:DownstreamNode)\n            WHERE expr.type IN [\n                "ExpressionStatement","IdentifierDeclStatement","ForInit","Condition"]\n            WITH expr,idf\n            MATCH (expr)-[:USE]->(adr_sym:GenericNode {\n                type:"Symbol",code:"& "+idf.code})\n            WITH expr,adr_sym,idf\n            MATCH (expr)<-[:FLOWS_TO*]-(def:DownstreamNode)-[:DEF]->(\n                def_sym:GenericNode {type:"Symbol",code:idf.code})\n            WHERE expr<>def AND def.type IN ["IdentifierDeclStatement","Parameter"]\n            MERGE (def)-[rdef:DEF {var:idf.code}]->(adr_sym)\n            MERGE (def)-[dflr:REACHES {var:adr_sym.code}]->(expr)\n            WITH DISTINCT expr\n            MATCH (ptr_sym:GenericNode {type:"Symbol"})<-[:DEF]-(expr)-[:FLOWS_TO*]-(\n                usr:DownstreamNode {type:"ExpressionStatement"})-[:USE]->(\n                star_sym:GenericNode {type:"Symbol",code:"* "+ptr_sym.code})\n            WHERE expr<>usr\n            MERGE (expr)-[sdef:DEF {var:ptr_sym.code}]->(star_sym)\n        ', '\n            MATCH (tc:GenericNode {type:"Testcase"})<-[:IS_FILE_OF]-(:GenericNode {\n                type:"File"})-[:IS_FILE_OF]->(:GenericNode {\n                type:"Function"})-[:IS_FUNCTION_OF_CFG]->(entry:UpstreamNode {\n                type:"CFGEntryNode"})\n            WHERE ID(tc)=%d\n            WITH DISTINCT entry\n            MATCH (entry)-[:CONTROLS*]->(n:DownstreamNode)\n            WITH DISTINCT n\n            MATCH (n)-[:DEF|USE]->(sym:GenericNode {type:"Symbol"})\n            WITH DISTINCT sym\n            MATCH (clr:GenericNode)-[use:USE]->(sym)<-[def:DEF]-(src:DownstreamNode)-[\n                cf:FLOWS_TO*]->(clr)\n            WHERE clr<>src AND NOT (src)-[:REACHES {var:sym.code}]->(clr)\n            MERGE (src)-[ndf:REACHES {var:sym.code}]->(clr)\n        ', '\n            MATCH (tc:GenericNode {type:"Testcase"})<-[\n                :IS_FILE_OF\n            ]-(:GenericNode {type:"File"})-[\n                :IS_FILE_OF\n            ]->(:GenericNode {type:"Function"})-[\n                :IS_FUNCTION_OF_CFG\n            ]->(entry:UpstreamNode {type:"CFGEntryNode"})\n            WHERE ID(tc)=%d\n            WITH DISTINCT entry\n            MATCH (entry)-[:CONTROLS*]->(caller:DownstreamNode)\n            WHERE caller.type IN ["ExpressionStatement","Condition"]\n            WITH DISTINCT caller\n            MATCH (caller)-[:IS_AST_PARENT*]->(cexpr:GenericNode {\n                type:"CallExpression"})\n            WHERE NOT (cexpr)<-[:IS_AST_PARENT*]-(:GenericNode {type:"CallExpression"})\n            WITH caller,cexpr\n            MATCH (caller)-[calrel:FLOWS_TO]->(callee:UpstreamNode {\n                type:"CFGEntryNode"})\n            WITH caller,cexpr,calrel,callee\n            MATCH (cexpr)-[:IS_AST_PARENT]->(\n                arglst:GenericNode {type:"ArgumentList"}\n            )-[:IS_AST_PARENT]->(\n                arg:GenericNode {type:"Argument"}\n            )-[:USE]->(sym:GenericNode {type:"Symbol"})<-[:USE]-(\n                caller\n            )<-[df0:REACHES]-(src:DownstreamNode)-[:DEF]->(sym)\n            DELETE df0\n            WITH caller,calrel,callee,arg,src\n            MATCH (callee)-[:FLOWS_TO|CONTROLS]->(\n                param:DownstreamNode {type:"Parameter",childNum:arg.childNum}\n            )-[:DEF]->(sym:GenericNode {type:"Symbol"})\n            WITH caller,calrel,callee,src,param,sym\n            MERGE (src)-[:REACHES {var:sym.code}]->(param)\n            WITH caller,calrel,callee,param,sym\n            MATCH (param)-[rpr:REACHES]->()\n            set rpr.src=sym.code\n            WITH caller,calrel,callee\n            MATCH (callee)-[:CONTROLS*]->(ret:DownstreamNode {type:"ReturnStatement"})\n            WHERE callee<>ret\n            MERGE (ret)-[:REACHES {callerid:ID(calrel)}]->(caller)\n        ', '\n            MATCH (tc:GenericNode {type:"Testcase"})<-[\n                :IS_FILE_OF\n            ]-(:GenericNode {type:"File"})-[\n                :IS_FILE_OF\n            ]->(:GenericNode {type:"Function",code:"main"})-[\n                :IS_FUNCTION_OF_CFG\n            ]->(main:UpstreamNode {type:"CFGEntryNode"})\n            WHERE ID(tc)=%d\n            WITH DISTINCT main\n            // Find sources (start of data flow on the control flow path)\n            MATCH (main)-[:FLOWS_TO*]->(src:GenericNode)\n            WHERE (src)-[:REACHES]->() AND NOT (src)<-[:REACHES]-()\n            WITH DISTINCT src\n            // Find destinations (end of dataflow)\n            MATCH pm1=(src)-[r1:REACHES*]->(dst:GenericNode)\n            WHERE NOT (dst)-[:REACHES]->()\n                AND ALL(idx IN RANGE(1,SIZE(r1)-1)\n                // Ensure we follow the same variable\n                WHERE r1[idx-1].var IN [r1[idx].var, r1[idx].src])\n            WITH DISTINCT src, dst, r1[-1].var AS var, pm1\n                ORDER BY LENGTH(pm1) // Order paths by length\n            // Group path by source, destination AND variable\n            WITH DISTINCT src, dst, var, COLLECT(pm1) AS paths\n            UNWIND RANGE(0, SIZE(paths)-2) AS idx\n            WITH src, dst, paths[idx] AS shorter, paths[idx+1] AS longer\n            // Check if the shorter path is a subset of the longer path\n            WHERE ALL(n IN NODES(shorter) WHERE n IN NODES(longer))\n            // Retrieve extraneous relationship / shortcuts IN the shorter path\n            WITH src, dst, FILTER(\n                r IN RELATIONSHIPS(shorter) WHERE NOT r IN RELATIONSHIPS(longer)\n            ) AS xr\n            // Delete the shortcuts\n            FOREACH(r IN xr | DELETE r)\n        ', '\n            MATCH (tc:GenericNode {type:"Testcase"})<-[\n                :IS_FILE_OF\n            ]-(:GenericNode {type:"File"})-[\n                :IS_FILE_OF\n            ]->(:GenericNode {type:"Function",code:"main"})-[\n                :IS_FUNCTION_OF_CFG\n            ]->(main:UpstreamNode {type:"CFGEntryNode"})\n            WHERE ID(tc)=%d\n            WITH DISTINCT main\n            MATCH (main)-[:FLOWS_TO*]->(:GenericNode)-[r1:REACHES]->(n:GenericNode)-[\n                r2:REACHES]->()\n            WHERE EXISTS(r1.size) AND NOT EXISTS(r2.size)\n              AND r1.var IN [r2.var, r2.src]\n            SET r2.size=r1.size\n            WITH DISTINCT n, r1, r2\n            MATCH p=(n)-[r2]->(:GenericNode)-[:REACHES*]->(:GenericNode)\n            WHERE ALL(idx IN RANGE(1, SIZE(RELATIONSHIPS(p))-1)\n              WHERE NOT EXISTS(RELATIONSHIPS(p)[idx].size)\n                AND RELATIONSHIPS(p)[idx-1].var IN [RELATIONSHIPS(p)[idx].var,\n                    RELATIONSHIPS(p)[idx].src])\n            UNWIND RELATIONSHIPS(p) AS r3\n            SET r3.size=r1.size\n        ']
class bugfinder.processing.interproc.InterprocProcessing(dataset, deprecation_warning=None)

Bases: Neo4J3Processing

assign_ports()

Randomly assign ports on the machine.

configure_command(command)

Configure the command to be sent to the container. Needs to be implemented by the subclass.

Parameters

command

configure_container()

Setup container variables.

configure_container_with_dict(container_config)

Configure the given container manually. Needs to be implemented by the subclass.

Parameters

container_config

interproc_cmds_post = []
interproc_cmds_pre = []
interproc_cmds_tc = []
log_input = None
log_output = None
send_commands()

Send commands to the container.

bugfinder.processing.interproc.interproc_worker(progress_bar, cmds, tcid, q, port)