| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368 |
- #include "llvm/Pass.h"
- #include "llvm/IR/Function.h"
- #include "llvm/IR/Module.h"
- #include "llvm/IR/BasicBlock.h"
- #include "llvm/IR/Instructions.h"
- #include "llvm/IR/LegacyPassManager.h"
- #include "llvm/Transforms/IPO/PassManagerBuilder.h"
- #include "llvm/Support/raw_ostream.h"
- #include "llvm/Analysis/CFG.h"
- #include "llvm/Analysis/CallGraph.h"
- #include "LogSystem.h"
- #include <vector>
- #include <map>
- #include <set>
- #include <queue>
- #include <algorithm>
- #include <string>
- using namespace llvm;
- namespace {
- struct FunctionSignature {
- unsigned inDegree;
- unsigned outDegree;
- unsigned depth;
- std::vector<unsigned> callerDepths;
- std::vector<unsigned> calleeDepths;
- FunctionSignature() : inDegree(0), outDegree(0), depth(0) {}
- bool operator==(const FunctionSignature& other) const {
- return inDegree == other.inDegree &&
- outDegree == other.outDegree &&
- depth == other.depth &&
- callerDepths == other.callerDepths &&
- calleeDepths == other.calleeDepths;
- }
- };
- // 函数调用图的节点结构
- struct CallNode {
- std::string name;
- bool isTarget;
- std::set<std::string> callers;
- std::set<std::string> callees;
- unsigned depth; // 在调用树中的深度
- CallNode() : name(""), isTarget(false), depth(0) {}
-
- CallNode(std::string n, bool target = false)
- : name(n), isTarget(target), depth(0) {}
- };
- struct CodeFusionPass : public ModulePass {
- public:
- static char ID;
- CodeFusionPass() : ModulePass(ID) {}
- bool runOnModule(Module &M) override {
- auto& logger = logging::LogSystem::getInstance();
- logger.setGlobalLevel(logging::LogLevel::DEBUG);
-
- LOG_INFO("runOnModule", "Starting analysis for module: {0}", M.getName().str());
- // 识别所有目标函数
- for (Function &F : M) {
- if (!F.isDeclaration() && isTargetCode(F)) {
- targetFunctions.insert(F.getName().str());
- }
- }
- // 构建完整的调用图
- buildCallGraph(M);
- // 计算每个函数的调用深度
- calculateCallDepths();
- // 生成调用图的可视化
- generateCallGraphs();
- // 分析相似结构
- findSimilarStructures();
- return false;
- }
- private:
- std::map<std::string, CallNode> callGraph;
- std::set<std::string> targetFunctions;
- std::map<std::string, unsigned> maxCallDepths;
- void buildCallGraph(Module &M) {
- LOG_INFO("buildCallGraph", "Building complete call graph");
- // 初始化所有函数节点
- for (Function &F : M) {
- if (!F.isDeclaration()) {
- std::string fname = F.getName().str();
- bool isTarget = targetFunctions.find(fname) != targetFunctions.end();
- callGraph.insert({fname, CallNode(fname, isTarget)});
- }
- }
- // 分析函数调用关系
- for (Function &F : M) {
- if (!F.isDeclaration()) {
- std::string callerName = F.getName().str();
-
- for (BasicBlock &BB : F) {
- for (Instruction &I : BB) {
- if (CallInst *CI = dyn_cast<CallInst>(&I)) {
- Function *CalledF = CI->getCalledFunction();
- if (CalledF && !CalledF->isDeclaration()) {
- std::string calleeName = CalledF->getName().str();
-
- // 更新调用关系
- callGraph[callerName].callees.insert(calleeName);
- callGraph[calleeName].callers.insert(callerName);
-
- LOG_DEBUG("buildCallGraph",
- "Found call: {0} -> {1}", callerName, calleeName);
- }
- }
- }
- }
- }
- }
- }
- void calculateCallDepths() {
- LOG_INFO("calculateCallDepths", "Calculating call depths");
- for (const std::string &targetFunc : targetFunctions) {
- std::map<std::string, unsigned> depths;
- std::queue<std::pair<std::string, unsigned>> queue;
- std::set<std::string> visited;
- queue.push(std::make_pair(targetFunc, 0));
- visited.insert(targetFunc);
- while (!queue.empty()) {
- std::string currentFunc = queue.front().first;
- unsigned depth = queue.front().second;
- queue.pop();
- depths[currentFunc] = depth;
-
- for (const std::string &caller : callGraph[currentFunc].callers) {
- if (visited.find(caller) == visited.end()) {
- queue.push(std::make_pair(caller, depth + 1));
- visited.insert(caller);
- }
- }
- for (const std::string &callee : callGraph[currentFunc].callees) {
- if (visited.find(callee) == visited.end()) {
- queue.push(std::make_pair(callee, depth + 1));
- visited.insert(callee);
- }
- }
- }
- // 更新最大深度
- for (const auto &pair : depths) {
- const std::string &func = pair.first;
- unsigned depth = pair.second;
- maxCallDepths[func] = std::max(maxCallDepths[func], depth);
- }
- }
- }
- void generateCallGraphs() {
- LOG_INFO("generateCallGraphs", "Generating complete call graphs");
- // 分别生成目标项目和掩体项目的调用图
- generateProjectCallGraph("Target Project Call Graph", true);
- generateProjectCallGraph("Cover Project Call Graph", false);
- }
- void generateProjectCallGraph(const std::string &title, bool isTarget) {
- errs() << "```mermaid\n";
- errs() << "graph TD\n";
- errs() << " %% " << title << "\n";
-
- // 添加节点
- for (const auto &pair : callGraph) {
- const std::string &name = pair.first;
- const CallNode &node = pair.second;
- if (node.isTarget == isTarget) {
- std::string nodeId = sanitizeNodeId(name);
- std::string depth = std::to_string(maxCallDepths[name]);
- std::string style = node.isTarget ? ":::target" : "";
-
- errs() << " " << nodeId << "[\"" << name
- << "\\nDepth: " << depth << "\"]" << style << "\n";
- }
- }
-
- // 添加边
- for (const auto &pair : callGraph) {
- const std::string &name = pair.first;
- const CallNode &node = pair.second;
- if (node.isTarget == isTarget) {
- std::string callerId = sanitizeNodeId(name);
-
- for (const auto &callee : node.callees) {
- if (callGraph.at(callee).isTarget == isTarget) {
- std::string calleeId = sanitizeNodeId(callee);
- errs() << " " << callerId << " --> " << calleeId << "\n";
- }
- }
- }
- }
-
- // 添加样式定义
- errs() << " classDef target fill:#f96,stroke:#333,stroke-width:4px\n";
- errs() << "```\n\n";
- }
- void findSimilarStructures() {
- LOG_INFO("findSimilarStructures", "Analyzing call graph similarities");
- // 为每个函数计算特征签名
- std::map<std::string, FunctionSignature> signatures;
- for (const auto &pair : callGraph) {
- const std::string &name = pair.first;
- const CallNode &node = pair.second;
-
- FunctionSignature sig;
- sig.inDegree = node.callers.size();
- sig.outDegree = node.callees.size();
- sig.depth = maxCallDepths[name];
- // 收集调用者深度
- for (const auto &caller : node.callers) {
- sig.callerDepths.push_back(maxCallDepths[caller]);
- }
- std::sort(sig.callerDepths.begin(), sig.callerDepths.end());
- // 收集被调用者深度
- for (const auto &callee : node.callees) {
- sig.calleeDepths.push_back(maxCallDepths[callee]);
- }
- std::sort(sig.calleeDepths.begin(), sig.calleeDepths.end());
- signatures[name] = sig;
- }
- // 比较目标函数和掩体函数的相似度
- for (const auto &targetFunc : targetFunctions) {
- const auto &targetSig = signatures[targetFunc];
- std::vector<std::pair<std::string, double>> similarities;
- for (const auto &pair : callGraph) {
- const std::string &name = pair.first;
- const CallNode &node = pair.second;
-
- if (!node.isTarget) {
- const auto &coverSig = signatures[name];
-
- // 计算相似度得分
- double similarity = calculateSignatureSimilarity(targetSig, coverSig);
-
- if (similarity > 0.8) { // 相似度阈值
- similarities.emplace_back(name, similarity);
- }
- }
- }
- // 输出相似函数
- if (!similarities.empty()) {
- LOG_INFO("findSimilarStructures",
- "Similar functions for {0}:", targetFunc);
- for (const auto &pair : similarities) {
- const std::string &name = pair.first;
- double similarity = pair.second;
- LOG_INFO("findSimilarStructures",
- " {0} (similarity: {1:.2f})", name, similarity);
- }
- }
- }
- }
- double calculateSignatureSimilarity(
- const FunctionSignature &sig1,
- const FunctionSignature &sig2) {
-
- double score = 0.0;
- unsigned totalFactors = 0;
- // 比较入度和出度
- if (sig1.inDegree > 0 || sig2.inDegree > 0) {
- score += 1.0 - std::abs(int(sig1.inDegree) - int(sig2.inDegree)) /
- double(std::max(sig1.inDegree, sig2.inDegree));
- totalFactors++;
- }
-
- if (sig1.outDegree > 0 || sig2.outDegree > 0) {
- score += 1.0 - std::abs(int(sig1.outDegree) - int(sig2.outDegree)) /
- double(std::max(sig1.outDegree, sig2.outDegree));
- totalFactors++;
- }
- // 比较深度
- if (sig1.depth > 0 || sig2.depth > 0) {
- score += 1.0 - std::abs(int(sig1.depth) - int(sig2.depth)) /
- double(std::max(sig1.depth, sig2.depth));
- totalFactors++;
- }
- // 比较调用者深度分布
- if (!sig1.callerDepths.empty() && !sig2.callerDepths.empty()) {
- score += compareDepthVectors(sig1.callerDepths, sig2.callerDepths);
- totalFactors++;
- }
- // 比较被调用者深度分布
- if (!sig1.calleeDepths.empty() && !sig2.calleeDepths.empty()) {
- score += compareDepthVectors(sig1.calleeDepths, sig2.calleeDepths);
- totalFactors++;
- }
- return totalFactors > 0 ? score / totalFactors : 0.0;
- }
- double compareDepthVectors(
- const std::vector<unsigned> &v1,
- const std::vector<unsigned> &v2) {
-
- size_t maxSize = std::max(v1.size(), v2.size());
- size_t minSize = std::min(v1.size(), v2.size());
-
- // 首先比较向量大小的相似度
- double sizeSimilarity = double(minSize) / maxSize;
-
- // 然后比较实际值的相似度
- double valueSimilarity = 0.0;
- for (size_t i = 0; i < minSize; i++) {
- valueSimilarity += 1.0 - std::abs(int(v1[i]) - int(v2[i])) /
- double(std::max(v1[i], v2[i]));
- }
- valueSimilarity /= maxSize;
- return (sizeSimilarity + valueSimilarity) / 2.0;
- }
- std::string sanitizeNodeId(const std::string &name) {
- std::string id = name;
- std::replace(id.begin(), id.end(), '.', '_');
- std::replace(id.begin(), id.end(), ' ', '_');
- std::replace(id.begin(), id.end(), '-', '_');
- return id;
- }
- bool isTargetCode(Function &F) {
- if (MDNode *MD = F.getMetadata("project_source")) {
- if (MDString *ProjectStr = dyn_cast<MDString>(MD->getOperand(0))) {
- std::string projectName = ProjectStr->getString().str();
- return (projectName == "Target");
- }
- }
- return false;
- }
- };
- }
- char CodeFusionPass::ID = 0;
- static RegisterPass<CodeFusionPass> X("codefusion", "Code Fusion Pass");
|