background
Some similar projects will use duplicate code, resulting in apple computer review was judged as fake package, before the review was rejected by Apple review. Deep obfuscation of our project’s code is required in order to allow normal iterations to produce similar functional projects.
plan
1. Prepare the word database of cet-4 and CET-6 (if random characters are used, they will be checked out by the machine); 2. Use clang to filter out class and method names; 3. Randomly form the word database from level 4 and level 6, and map the corresponding class name and method name; 4. Obfuscate operations by mapping.
Vocabulary library of four and six levels
Distinguish between lowercase and uppercase TXT
Install the clang
pip install clang --user
Copy the code
The name of the class to confuse
Extract the name of the class
# encoding: utf-8
import sys
import os
import re
import clang
from clang.cindex import *
from optparse import OptionParser, OptionGroup
def get_tu(source, lang='c', all_warnings=False, flags=[]):
"""Obtain a translation unit from source and language. By default, the translation unit is created from source file "t.<ext>" where
is the default file extension for the specified language. By default it is C, so "
t.c" is the default file name. Supported languages are {c, cpp, objc}. all_warnings is a convenience argument to enable all compiler warnings. """
args = list(flags)
name = 't.c'
if lang == 'cpp':
name = 't.cpp'
args.append('-std=c++11')
elif lang == 'objc':
name = 't.m'
eliflang ! ='c':
raise Exception('Unknown language: %s' % lang)
if all_warnings:
args += ['-Wall'.'-Wextra']
return TranslationUnit.from_source(name, args, unsaved_files=[(name,
source) def generate_m_file(file_text, result_lines, ret_functions): //if __name__ == '__main__':
libclangPath = '/Library/Developer/CommandLineTools/usr/lib/libclang.dylib'
Config.set_library_file(libclangPath)
# Find all .h files
source_dir = sys.argv[1]
g = os.walk(source_dir)
h_files = []
ret_functions = []
for path,dir_list,file_list in g:
for file_name in file_list:
h_files.append(os.path.join(path, file_name))
for f in h_files:
with open(f, 'r') as file:
# preprocess
regex = r'#import|#include|#ifdef|#ifndef|#define|#endif|@property'
text = ' '
result_text_lines = []
line_count = 0
def_block_count = 0
for line in file:
if re.findall(regex, line):
if '#ifdef' in line:
def_block_count += 1
result_text_lines.append(line)
elif '#endif' in line and def_block_count > 0:
def_block_count -= 1
result_text_lines.append(line)
else:
result_text_lines.append('\n')
line = '// ' + line
text += line
else:
text += line
result_text_lines.append('\n')
line_count += 1
# print text
m_file_name = os.path.join('fake', f.replace('.h'.'.m'))
m_file_to_write = ""
# print 'processing: ' + m_file_name
generate_m_file(text, result_text_lines, ret_functions)
unique_array = list(set(ret_functions))
filter_array = ['xxxx'.'aaaa'.'dddd'.'AppDelegate', \
'PrefixHeader'.'dddddf'.'aaaadxxx']
for func_item in unique_array:
if func_item in filter_array:
continue
print func_item
Copy the code
Filter_array specifies the name of the class to filter out without obfuscation
Map class names to extracted class names
#! /usr/bin/env bash
TABLENAME=symbols
SYMBOL_DB_FILE="symbols"
STRING_SYMBOL_FILE=./process_class/t.txt
HEAD_FILE=./rename-class/rename_classes.txt
export LC_CTYPE=C
rm -f $SYMBOL_DB_FILE
rm -f $HEAD_FILE
function rand(){
min=The $1
max=$(($2-$min+1))
num=$(($RANDOM+ 1000000000))# add a 10 digit number to the remainder
echo $(($num%$max+$min))}function pRnd2(){
rnd=$(rand 10 4200)
randint=`expr $RANDOM% 3 `if [ $randint == 0 ];then
echo `cat "JAAA.txt" | sed -n "${rnd}p"`
elif [ $randint == 1 ];then
echo `cat "JBBB.txt" | sed -n "${rnd}p"`
else
echo `cat "JCCC.txt" | sed -n "${rnd}p"`
fi
}
my_arr=("Manager" "DataSource" "Helper" "Adapter" "Router" "Handler" "Handle" \
"Model" "Service" "Item" "Info" "Controller" "Cell" "Button" "View" "Window")
touch $HEAD_FILE
# echo "//confuse string at `date`" >> $HEAD_FILE
cat "$STRING_SYMBOL_FILE" | while read -ra line; do
# Probability of hit
#randint=`expr $RANDOM % 3`
#if [ $randint != 0 ]; then
#continue
#fi
# fetch random characters
if [[ ! -z "$line"]].then
suffix=""
for loop in ${my_arr[@]}; do
if [[ $line= ~$loop]].then
suffix=$loop
break
fi
done
ramdom="CS$(pRnd2)$(pRnd2)${suffix}"
echo $line $ramdom
#insertValue $line $ramdom
echo "$line $ramdom" >> $HEAD_FILE
fi
done
Copy the code
Ramdom can add project prefixes, such as CS, etc. My_arr allows you to define some iOS specific suffixes.
Obfuscate the mapped class
#! /bin/bashPROJECT_DIR=`cat .. /path.txt`echo $PROJECT_DIR
RENAME_CLASSES=rename_classes.txt
#First, we substitute the text in all of the files.
sed_cmd=`sed -e 's@^@s/[[:<:]]@; s@[[:space:]]\{1,\}@[[:>:]]/@; s@$@/g; @ ' ${RENAME_CLASSES} `
find ${PROJECT_DIR} -type f \
\( -name "*.pbxproj" -or -name "*.pch" -or -name "*.h" -or -name "*.m" -or -name "*.xib" -or -name "*.storyboard" \) \
-exec sed -i "" "${sed_cmd}" {} +
# Now, we rename the .h/.m files
while read line; do
class_from=`echo $line | sed "s/[[:space:]]\{1,\}.*//"`
class_to=`echo $line | sed "s/.*[[:space:]]\{1,\}//"`
# change.h.m
find ${PROJECT_DIR} -type f -regex ". * [[: < :]]${class_from}[[:>:]][^\/]*\.[hm]" -print | egrep -v '.bak$' | \
while read file_from; do
file_to=`echo $file_from | sed "s/\(.*\)[[:<:]]${class_from}\ [[: > :]] (\ [^ \] / *) / \ 1${class_to}2 \ /"`
echo mv "${file_from}" "${file_to}"
mv "${file_from}" "${file_to}"
done
# change. Xib
find ${PROJECT_DIR} -type f -regex ". * [[: < :]]${class_from}[[:>:]][^\/]*\.xib" -print | egrep -v '.bak$' | \
while read file_from; do
file_to=`echo $file_from | sed "s/\(.*\)[[:<:]]${class_from}\ [[: > :]] (\ [^ \] / *) / \ 1${class_to}2 \ /"`
echo mv "${file_from}" "${file_to}"
mv "${file_from}" "${file_to}"
done
done < ${RENAME_CLASSES}
Copy the code
Rename_classes.txt is the saved mapping class, and shell scripts do the bulk replacement of the project.
Methods to confuse
Extraction method name
# encoding: utf-8
import sys
import os
import re
import clang
from clang.cindex import *
from optparse import OptionParser, OptionGroup
def get_tu(source, lang='c', all_warnings=False, flags=[]):
"""Obtain a translation unit from source and language. By default, the translation unit is created from source file "t.<ext>" where
is the default file extension for the specified language. By default it is C, so "
t.c" is the default file name. Supported languages are {c, cpp, objc}. all_warnings is a convenience argument to enable all compiler warnings. """
args = list(flags)
name = 't.c'
if lang == 'cpp':
name = 't.cpp'
args.append('-std=c++11')
elif lang == 'objc':
name = 't.m'
eliflang ! ='c':
raise Exception('Unknown language: %s' % lang)
if all_warnings:
args += ['-Wall'.'-Wextra']
return TranslationUnit.from_source(name, args, unsaved_files=[(name,
source)])
def parse_method(node):
tokens = list(node.get_tokens())
# filter method name,TODO:
filter_start_words = ('init'.'set'.'get'.'image'.'view'.'reload'.'_'.'will'.'did')
function = ' '
for token_index in range(len(tokens)):
if tokens[token_index].spelling == ') ':
function = tokens[token_index + 1].spelling
break
if len(function) > 10 and (not function.startswith(filter_start_words)):
return function
else:
return ' '
# extract_type = 0x00001: Normal method
# extract_type = 0x00011: Normal method + attributeDef parse_symbols(cursor, ret_symbols, extract_type): class_children = list(cursor.get_children()) //# extract_type = 0x01100: Category, Class
# extract_type = 0x10000: Protocol
def extract_symbols(file_text, ret_symbols, extract_type):
parser = OptionParser("usage: %prog [options] {filename} [clang-args*]")
parser.disable_interspersed_args()
(opts, args) = parser.parse_args()
# if len(args) == 0:
# parser.error('invalid number arguments')
index = Index.create()
# tu = index.parse(file_text, ['-x', 'objective-c'])
tu = get_tu(file_text, lang='objc')
if not tu:
parser.error("unable to load input")
it = tu.cursor.get_children()
tu_nodes = list(it)
for cursor in tu_nodes:
if cursor.kind == CursorKind.OBJC_INTERFACE_DECL:
# print cursor.spelling
if extract_type & 0x00100:
parse_symbols(cursor, ret_symbols, extract_type)
elif cursor.kind == CursorKind.OBJC_CATEGORY_DECL:
# print cursor.spelling
if extract_type & 0x01000:
# print "Categor ============"
parse_symbols(cursor, ret_symbols, extract_type)
elif cursor.kind == CursorKind.OBJC_PROTOCOL_DECL:
if extract_type & 0x10000:
parse_symbols(cursor, ret_symbols, extract_type)
Extract method names from all.h&.m files in this directory
def traverse_header_files(top_directory, extract_type):
g = os.walk(top_directory)
h_files = []
ret_symbols = []
for path,dir_list,file_list in g:
for file_name in file_list:
if file_name.endswith('.h') or file_name.endswith('.m'):
h_files.append(os.path.join(path, file_name))
for f in h_files:
with open(f, 'r') as file:
# preprocess
regex = r'#import|#include|#ifdef|#ifndef|#define|#endif|#if|#else|@class'
text = ' '
result_text_lines = []
line_count = 0
def_block_count = 0
for line in file:
if re.findall(regex, line):
line = '// ' + line
text += line
else:
interface_idx = line.find('@interface')
if interface_idx > 0:
line = line[interface_idx:]
# print line
text += line
line_count += 1
# print 'processing: '
extract_symbols(text, ret_symbols, extract_type)
return set(ret_symbols).copy()
if __name__ == '__main__':
libclangPath = '/Library/Developer/CommandLineTools/usr/lib/libclang.dylib'
Config.set_library_file(libclangPath)
source_dir = sys.argv[1]
pods_dir = sys.argv[2]
The name of the method used to extract the header file
source_dir_methods_set = traverse_header_files(source_dir, 0x00101)
Extract the Source attribute
filter_set_A = traverse_header_files(source_dir, 0x10110)
Extract Source Category methods and attributes
filter_set_B = traverse_header_files(source_dir, 0x11011)
Extract the methods and properties of normal classes and categories in the Pods directory
filter_set_C = traverse_header_files(pods_dir, 0x11111)
# difference set
result_set = source_dir_methods_set.difference(filter_set_A).difference(filter_set_B).difference(filter_set_C)
unique_list = list(result_set)
for func_item in unique_list:
print func_item
# print 'Source len = ' + str(len(source_dir_methods_set))
# print 'Result len = ' + str(len(result_set))
Copy the code
Source_dir is the project code directory and POds_dir is the Pods code directory. Since the Pods have three-party code, exclude it from the project code and map the remaining methods.
Write the method map and macro definition to the file
#! /usr/bin/env bash
STRING_SYMBOL_FILE=./process_method/method_list.txt
HEAD_FILE=./methodDefine.h
export LC_CTYPE=C
rm -f $HEAD_FILE
function rand(){
min=The $1
max=$(($2-$min+1))
num=$(($RANDOM+ 1000000000))# add a 10 digit number to the remainder
echo $(($num%$max+$min))}function pRnd1(){
rnd=$(rand 10 140000)
randt=`expr $RANDOM% 3 `if [ $randt == 0 ];then
echo `cat "a.txt" | sed -n "${rnd}p"`
elif [ $randt == 1 ];then
echo `cat "b.txt" | sed -n "${rnd}p"`
else
echo `cat "c.txt" | sed -n "${rnd}p"`
fi
}
function pRnd2(){
rnd=$(rand 10 140000)
randt=`expr $RANDOM% 3 `if [ $randt == 0 ];then
echo `cat "AAA.txt" | sed -n "${rnd}p"`
elif [ $randt == 1 ];then
echo `cat "BBB.txt" | sed -n "${rnd}p"`
else
echo `cat "CCC.txt" | sed -n "${rnd}p"`
fi
}
touch $HEAD_FILE
echo '#ifndef methodDefine_h
#define methodDefine_h' >> $HEAD_FILE
echo "//confuse string at `date`" >> $HEAD_FILE
cat "$STRING_SYMBOL_FILE" | while read -ra line; do
# Probability of hit
#randint=`expr $RANDOM % 3`
#if [ $randint != 0 ]; then
#continue
#fi
# fetch random characters
if [[ ! -z "$line"]].then
ramdom="$(pRnd1)$(pRnd2)"
echo $line $ramdom
echo "#ifndef $line
#define $line $ramdom
#endif" >> $HEAD_FILE
fi
done
echo "#endif" >> $HEAD_FILE
Copy the code
The import file
PrefixHeader imports methodDefine. H file and method obfuscation is complete