#include "llvm/Pass.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CallGraph.h" #include "LogSystem.h" #include #include #include #include #include #include using namespace llvm; namespace { struct FunctionSignature { unsigned inDegree; unsigned outDegree; unsigned depth; std::vector callerDepths; std::vector calleeDepths; FunctionSignature() : inDegree(0), outDegree(0), depth(0) {} bool operator==(const FunctionSignature& other) const { return inDegree == other.inDegree && outDegree == other.outDegree && depth == other.depth && callerDepths == other.callerDepths && calleeDepths == other.calleeDepths; } }; // 函数调用图的节点结构 struct CallNode { std::string name; bool isTarget; std::set callers; std::set callees; unsigned depth; // 在调用树中的深度 CallNode() : name(""), isTarget(false), depth(0) {} CallNode(std::string n, bool target = false) : name(n), isTarget(target), depth(0) {} }; struct CodeFusionPass : public ModulePass { public: static char ID; CodeFusionPass() : ModulePass(ID) {} bool runOnModule(Module &M) override { auto& logger = logging::LogSystem::getInstance(); logger.setGlobalLevel(logging::LogLevel::DEBUG); LOG_INFO("runOnModule", "Starting analysis for module: {0}", M.getName().str()); // 识别所有目标函数 for (Function &F : M) { if (!F.isDeclaration() && isTargetCode(F)) { targetFunctions.insert(F.getName().str()); } } // 构建完整的调用图 buildCallGraph(M); // 计算每个函数的调用深度 calculateCallDepths(); // 生成调用图的可视化 generateCallGraphs(); // 分析相似结构 findSimilarStructures(); return false; } private: std::map callGraph; std::set targetFunctions; std::map maxCallDepths; void buildCallGraph(Module &M) { LOG_INFO("buildCallGraph", "Building complete call graph"); // 初始化所有函数节点 for (Function &F : M) { if (!F.isDeclaration()) { std::string fname = F.getName().str(); bool isTarget = targetFunctions.find(fname) != targetFunctions.end(); callGraph.insert({fname, CallNode(fname, isTarget)}); } } // 分析函数调用关系 for (Function &F : M) { if (!F.isDeclaration()) { std::string callerName = F.getName().str(); for (BasicBlock &BB : F) { for (Instruction &I : BB) { if (CallInst *CI = dyn_cast(&I)) { Function *CalledF = CI->getCalledFunction(); if (CalledF && !CalledF->isDeclaration()) { std::string calleeName = CalledF->getName().str(); // 更新调用关系 callGraph[callerName].callees.insert(calleeName); callGraph[calleeName].callers.insert(callerName); LOG_DEBUG("buildCallGraph", "Found call: {0} -> {1}", callerName, calleeName); } } } } } } } void calculateCallDepths() { LOG_INFO("calculateCallDepths", "Calculating call depths"); for (const std::string &targetFunc : targetFunctions) { std::map depths; std::queue> queue; std::set visited; queue.push(std::make_pair(targetFunc, 0)); visited.insert(targetFunc); while (!queue.empty()) { std::string currentFunc = queue.front().first; unsigned depth = queue.front().second; queue.pop(); depths[currentFunc] = depth; for (const std::string &caller : callGraph[currentFunc].callers) { if (visited.find(caller) == visited.end()) { queue.push(std::make_pair(caller, depth + 1)); visited.insert(caller); } } for (const std::string &callee : callGraph[currentFunc].callees) { if (visited.find(callee) == visited.end()) { queue.push(std::make_pair(callee, depth + 1)); visited.insert(callee); } } } // 更新最大深度 for (const auto &pair : depths) { const std::string &func = pair.first; unsigned depth = pair.second; maxCallDepths[func] = std::max(maxCallDepths[func], depth); } } } void generateCallGraphs() { LOG_INFO("generateCallGraphs", "Generating complete call graphs"); // 分别生成目标项目和掩体项目的调用图 generateProjectCallGraph("Target Project Call Graph", true); generateProjectCallGraph("Cover Project Call Graph", false); } void generateProjectCallGraph(const std::string &title, bool isTarget) { errs() << "```mermaid\n"; errs() << "graph TD\n"; errs() << " %% " << title << "\n"; // 添加节点 for (const auto &pair : callGraph) { const std::string &name = pair.first; const CallNode &node = pair.second; if (node.isTarget == isTarget) { std::string nodeId = sanitizeNodeId(name); std::string depth = std::to_string(maxCallDepths[name]); std::string style = node.isTarget ? ":::target" : ""; errs() << " " << nodeId << "[\"" << name << "\\nDepth: " << depth << "\"]" << style << "\n"; } } // 添加边 for (const auto &pair : callGraph) { const std::string &name = pair.first; const CallNode &node = pair.second; if (node.isTarget == isTarget) { std::string callerId = sanitizeNodeId(name); for (const auto &callee : node.callees) { if (callGraph.at(callee).isTarget == isTarget) { std::string calleeId = sanitizeNodeId(callee); errs() << " " << callerId << " --> " << calleeId << "\n"; } } } } // 添加样式定义 errs() << " classDef target fill:#f96,stroke:#333,stroke-width:4px\n"; errs() << "```\n\n"; } void findSimilarStructures() { LOG_INFO("findSimilarStructures", "Analyzing call graph similarities"); // 为每个函数计算特征签名 std::map signatures; for (const auto &pair : callGraph) { const std::string &name = pair.first; const CallNode &node = pair.second; FunctionSignature sig; sig.inDegree = node.callers.size(); sig.outDegree = node.callees.size(); sig.depth = maxCallDepths[name]; // 收集调用者深度 for (const auto &caller : node.callers) { sig.callerDepths.push_back(maxCallDepths[caller]); } std::sort(sig.callerDepths.begin(), sig.callerDepths.end()); // 收集被调用者深度 for (const auto &callee : node.callees) { sig.calleeDepths.push_back(maxCallDepths[callee]); } std::sort(sig.calleeDepths.begin(), sig.calleeDepths.end()); signatures[name] = sig; } // 比较目标函数和掩体函数的相似度 for (const auto &targetFunc : targetFunctions) { const auto &targetSig = signatures[targetFunc]; std::vector> similarities; for (const auto &pair : callGraph) { const std::string &name = pair.first; const CallNode &node = pair.second; if (!node.isTarget) { const auto &coverSig = signatures[name]; // 计算相似度得分 double similarity = calculateSignatureSimilarity(targetSig, coverSig); if (similarity > 0.8) { // 相似度阈值 similarities.emplace_back(name, similarity); } } } // 输出相似函数 if (!similarities.empty()) { LOG_INFO("findSimilarStructures", "Similar functions for {0}:", targetFunc); for (const auto &pair : similarities) { const std::string &name = pair.first; double similarity = pair.second; LOG_INFO("findSimilarStructures", " {0} (similarity: {1:.2f})", name, similarity); } } } } double calculateSignatureSimilarity( const FunctionSignature &sig1, const FunctionSignature &sig2) { double score = 0.0; unsigned totalFactors = 0; // 比较入度和出度 if (sig1.inDegree > 0 || sig2.inDegree > 0) { score += 1.0 - std::abs(int(sig1.inDegree) - int(sig2.inDegree)) / double(std::max(sig1.inDegree, sig2.inDegree)); totalFactors++; } if (sig1.outDegree > 0 || sig2.outDegree > 0) { score += 1.0 - std::abs(int(sig1.outDegree) - int(sig2.outDegree)) / double(std::max(sig1.outDegree, sig2.outDegree)); totalFactors++; } // 比较深度 if (sig1.depth > 0 || sig2.depth > 0) { score += 1.0 - std::abs(int(sig1.depth) - int(sig2.depth)) / double(std::max(sig1.depth, sig2.depth)); totalFactors++; } // 比较调用者深度分布 if (!sig1.callerDepths.empty() && !sig2.callerDepths.empty()) { score += compareDepthVectors(sig1.callerDepths, sig2.callerDepths); totalFactors++; } // 比较被调用者深度分布 if (!sig1.calleeDepths.empty() && !sig2.calleeDepths.empty()) { score += compareDepthVectors(sig1.calleeDepths, sig2.calleeDepths); totalFactors++; } return totalFactors > 0 ? score / totalFactors : 0.0; } double compareDepthVectors( const std::vector &v1, const std::vector &v2) { size_t maxSize = std::max(v1.size(), v2.size()); size_t minSize = std::min(v1.size(), v2.size()); // 首先比较向量大小的相似度 double sizeSimilarity = double(minSize) / maxSize; // 然后比较实际值的相似度 double valueSimilarity = 0.0; for (size_t i = 0; i < minSize; i++) { valueSimilarity += 1.0 - std::abs(int(v1[i]) - int(v2[i])) / double(std::max(v1[i], v2[i])); } valueSimilarity /= maxSize; return (sizeSimilarity + valueSimilarity) / 2.0; } std::string sanitizeNodeId(const std::string &name) { std::string id = name; std::replace(id.begin(), id.end(), '.', '_'); std::replace(id.begin(), id.end(), ' ', '_'); std::replace(id.begin(), id.end(), '-', '_'); return id; } bool isTargetCode(Function &F) { if (MDNode *MD = F.getMetadata("project_source")) { if (MDString *ProjectStr = dyn_cast(MD->getOperand(0))) { std::string projectName = ProjectStr->getString().str(); return (projectName == "Target"); } } return false; } }; } char CodeFusionPass::ID = 0; static RegisterPass X("codefusion", "Code Fusion Pass");