Survey
* Your assessment is very important for improving the workof artificial intelligence, which forms the content of this project
* Your assessment is very important for improving the workof artificial intelligence, which forms the content of this project
© 2014 YEKYUNG KIM ALL RIGHTS RESERVED FRAMEWORK FOR ANALYSIS OF ANDROID MALWARE A Thesis Presented to The Graduate Faculty of The University of Akron In Partial Fulfillment of the Requirements for the Degree Master of Science Yekyung Kim December, 2014 FRAMEWORK FOR ANALYSIS OF ANDROID MALWARE Yekyung Kim Thesis Approved: Accepted: _____________________________ Advisor Dr. Kathy J. Liszka _____________________________ Department Chair Dr. Timothy Norfolk _____________________________ Committee Member Dr. Chien-Chung Chan _____________________________ Dean of the College Dr. Chand Midha _____________________________ Committee Member Dr. Yingcai Xiao _____________________________ Dean of the Graduate School Dr. Rex D. Ramsier _____________________________ Date ii TABLE OF CONTENTS Page LIST OF TABLES ............................................................................................................. vi LIST OF FIGURES .......................................................................................................... vii CHAPTER I. ANDROID MALWARE ..................................................................................................1 1.1 Introduction ................................................................................................................1 1.2 General methodologies to detect Android malware...................................................4 1.3 Related research .........................................................................................................6 II. ANDROID MALWARE REVERSE ENGINEERING ..................................................9 III. DROIDDREAM AND DROIDDREAMLIGHT .........................................................17 3.1 DroidDream .............................................................................................................17 3.2 DroidDreamLight .....................................................................................................19 IV. ANALYZING ANDROID MALWARE .....................................................................21 4.1 Android Package File ...............................................................................................21 4.2 Structure of AndroidManifest.xml file ....................................................................21 4.3 Detecting DroidDream by analyzing the manifest file ............................................25 iii V. EXPERIMENT .............................................................................................................32 5.1 Detecting similar behaviors in DroidDreamLight ...................................................32 5.2 Naïve Bayesian Classification .................................................................................34 5.3 J48 Classification .....................................................................................................37 VI. CONCLUSION............................................................................................................41 BIBLIOGRAPHY ..............................................................................................................43 APPENDICES ...................................................................................................................46 APPENDIX A. TOPS LIST ...........................................................................................47 APPENDIX B. INSTRUCTION OF ANALYZING DROIDDREAM .........................50 APPENDIX C. INSTRUCTION OF ANALYZING DDL AND TOPS .......................51 APPENDIX D. GENERATEOUTPUT.SH ...................................................................53 APPENDIX E. APKCOUNT.PY ..................................................................................54 APPENDIX F. COUNTPARAMETER.PY ..................................................................60 iv LIST OF TABLES Table Page 1 Permissions in DroidDream ............................................................................................27 2 Action name ....................................................................................................................29 3 Category name ...............................................................................................................29 4 Services in DroidDream .................................................................................................30 5 Functions in DroidDream ..............................................................................................31 6 Confusion Matrix from Naïve Bayesian Classification .................................................35 7 Output of Naïve Bayesian Classification .......................................................................36 8 Confusion Matrix from J48 Classification with 5 Parameters .......................................38 9 Confusion Matrix from J48 Classification with 17 Parameters .....................................40 v LIST OF FIGURES Figure Page 1 Rapid growth of mobile malware samples between 2012 and 2013 ..............................2 2 Dex2jar commands on Linux and Windows ...................................................................11 3 JD-GUI make possible to read .jar .................................................................................12 4 APK file overview .........................................................................................................21 5 Structure of Android Manifest file .................................................................................22 6 Framework for analyzing DroidDream ..........................................................................26 7 17 parameters after analyzing DroidDream ...................................................................32 8 Example of data file .......................................................................................................33 9 Result of J48 Decision Tree with 5 Parameters .............................................................39 10 Result of J48 Decision Tree with 17 Parameters .........................................................40 vi CHAPTER I ANDROID MALWARE 1.1 Introduction Mobile computing devices such as smartphones and tablets are becoming increasingly popular. Android device was seeing 550,000 activations per day [1] and 675 million smartphone sold in 2012. Moreover 1.875 billion mobile phones to be sold in 2013 and 1 billion units will be smartphones according to Gartner [2]. The Android platform will continue to benefit from this popularity, with sales of Android phones in 2014 approaching the billion mark [3]. One of the reason why Android OS is over popular than iOS and other mobile platforms is that it is open source. The open source means that after Google create the Android OS kernel, phone seller can extend their OS based on the Android OS kernel, and app-developers are free to create apps unfettered by restrictive rules. It attracts not only regular developers, but also malware developers, too. These Android applications from unknown authors can share most sensitive user information from a mobile device without the user’s knowledge. Especially malware writers are using these shared information to maximize their profit. 1 Mobile malware has already become a serious concern. Over the past year this security issues has received as much attention as the Android market growth. According to Juniper Networks Mobile Threat Center (JN-MTC), “Rapid mobile malware growth and increased sophistication of cyber criminals, turning attacks into an increasingly profit-driven business. Mobile malware threats growing at a rapid rate of 614 percent to 276,259 total malicious apps, demonstrating an exponentially higher cyber-criminal interest in mobile devices” [4]. Figure 1 shows this rapid growth of mobile malware between March 2012 and March 2013. Attackers are maximizing their profits on investment by focusing at Android, which is open source platform. This trend prove that more attackers are shifting to mobile. Figure 1. Rapid growth of mobile malware samples between 2012 and 2013 JN-MTC also explain several mobile malware trends, such as: Malware for the Android operating system has increased at a staggering rate since 2010, growing from 24 percent of all mobile malware that year to 92 percent by March 2013. 2 Attackers find out more effective distribution way such as shorten the supply chain or using regulated third-party app marketplaces to spread out the malware more quickly. 73% of all known malware exploit holes in mobile payments. And very skilled attackers are developing botnets for mobile devices and targeted attacks to data on corporate networks. The majority of Android user keep old version of Andoird OS in their hands and it makes easy to expose on even known threats. Not only malicious apps, but also several legitimate free applications pose a risk of leaking corporate or personal data on devices. Free applications requesting and gaining access to account information is in common. The first Android Trojan was discovered in 2010, and in 2011 DroidDream, DroidKungFu, and Plankton are seen in the market according to McAfee’s White Paper [5]. The first Trojan was Fake Player, discovered acting as a fake media player application that sends SMS messages to premium-rate numbers without a phone user’s authorization. The user unintentionally should pay a premium of SMS service. At that time the risk for the users was limited in distribution. Geinimi was the most sophisticated malware at the end of 2010. It disguised into legitimate application, and it has an encryption and obfuscation. It has source code looks like a bonet capabilities. After the first quarter of 2011, DroidDream introduced and got the root privilege in an Android device in order to have full control of the smartphone. It gathers personal information from the device like the phone number and IMEI, and it will send that data to a remote server. Unlike DroidDream, DroidDreamLight can be executed without user intervention. It will collect information related to the device and it will send the data to a remote server. 3 Once the device is identified, the malware will try to download and install new applications. Similar to DroidDreamLight, DroidKungFu collects information about the device, and installs a second application that can download more malicious software onto the device. This paper aims to provide to be a framework for analyzing Android malware and also detecting a similar behavior between malware families. The rest of chapter 1 explain general methodologies to detect Android malware and also it introduce a related research. In chapter 2 explains about Android malware reverse engineering with several tools not only tools we using in this paper, but also other tools we research for the reverse engineering. These additional information would benefits others who want to expand the similar Android malware research. In chapter 3 we explain briefly about two Trojan malware, DroidDream and DroidDreamLight, focus on their behavior and history. From chapter 4 and 5 we propose a detecting method to find a similar behavior between DroidDream and DroidDreamLight. This research could be a first step towards recognizing, predicting, and preventing the threat posed by Android malware and could be a reference for further studies. 1.2 General methodologies to detect Android malware Mobile malware can be analyzed using static analysis and dynamic analysis. While the dynamic analysis is commonly used for the desktop computer to study malware, the static analysis is commonly used for the mobile device, since malicious samples are restricted, and usually it will provide more accurate data than the replication of the malicious application in a test environment. 4 In static analysis, Android application will come as an Android application package (APK) file, which is built on the .zip file format. To extract all the components inside an APK file, tools can be used to decompress the initial file such as Winzip, Winrar, and 7-zip. Once the files and folders are extracted, the static analysis will be focused on two components, AndroidManifest.xml and the classes.dex. AndroidManifest.xml comes in a binary format, but it can transform from a binary into a text file using a tool like AXMLPrinter2.jar. Once the file is decrypted, it is important to analyze it to see if the permissions of the application is related to the ones that supposedly are required. For example, it is not usual for a GPS application to require permissions to read phone’s contact information because this is not related to a known functionality of the application. Other important component of the Manifest is the section where the application components are defined. Usually, if the application is not totally malicious, the activity defined in the Manifest corresponds to the original clean application because it will show a graphical interface. However, most of the malware nowadays modifies the Manifest to include a malicious service that will run in the background without the user’s knowledge. Once the AndroidManifest.xml is analyzed, the next component of the APK file to be analyzed is the file classes.dex. It can be analyzed using two different methodologies: decompiling and disassembling the dex file. Decompiling consists of performing the reverse operation of the compiler to translate the executable program into the original source code. The first step is to convert the dex file into a Java Archive(JAR) file using the tool Dex2Jar. Once the JAR file is obtained, a Java decompiler can be used to get an approximation of the original source 5 code of the application. But the Java code obtained is not exactly the same as the original source code and sometimes the output of the decompiler could be lacking perfect logic and might show disassembled code that cannot be fully reversed into human-readable Java code. Android has a native disassembler called dexdump, which can be found in the folder Android/platform-tools. However the output of this tool is not user-friendly because the code is not dumped entirely and some instructions are not fully disassembled. Dedexer baskmali and IDA Pro are also free tools to perform a more readable output. Once the output is generated by one of the tools, a basic understanding of Java and Dalvik bytecode is necessary to understand the purpose of the code. 1.3 Related research Many researches have studied for analyzing and detecting Android malware prior to their installation, and their works could be categorized by two methods: static and dynamic analysis. Static analysis approaches focus on comparing programs with known malware based on the program code looking for signatures. This signature-based methods which extract signatures from malware sample is one of the popular approaches. In [6] they detect a malware at the network level. They observe network traffic originating from a sample application and tried to detect malware by comparing with DNS-based and IPaddress-based blacklists. However it also cannot detect unknown malware because the detection rely on the blacklists which well known as malicious websites. In [7] they presented a method to analyze attributes of files in unknown sample application. While this research can detect some unknown malware, the analysis cost depends on the number 6 of files in sample application. [8] proposed a lightweight method to block the installation of applications that have dangerous permissions or intent filter combination. However, the method may lead to incorrect detection, because the method is not sufficient to differentiate malware from benign applications. In [9] proposed DroidMat which provide a static analysis system for detecting Android malware. They obtained some attributes such as Permission, Activity, Service, Receiver, and API calls from manifest and dex files using smali code. DroidMat can discriminate between malware and benign applications. However, the cost of analyzing depends on the size and numbers of files. Also in other static analysis approaches such as Kirin[8], Stowaway[10] , and RiskRanker[11], they has only a small run-time overhead. While these approaches are efficient and scalable, they mainly build on manually crafted detection patterns which are often not available for new malware instances. Dynamic analysis for Android malware application is performed by executing programs on a real or virtual Android machine. TaintDroid [12], DroidRanger [13] and DroidScope [14] are methods that can monitor the behavior of applications at run time. Although very effective in identifying malicious activity, run time monitoring suffers from a significant overhead and cannot be directly applied on mobile devices. Although Android OS has full operating system functionality, there are some drawbacks such as limited resources. In [15] [16] [17] they approach using machine learning and data mining approaches for a traditional malware detection. In [15] they train several machine learning algorithms on byte string n-grams. In [16] they compare three machine learning algorithms trained on three features such as DLL and system calls made by the program, 7 strings found in the program binary, and a raw hexadecimal representation of the binary. In [18] they train a neural network to detect boot sector viruses, based on bytestring trigrams. Chapter 2 explains about Android malware analysis tools to provide a reverse engineering, debugging, monitoring and generally emulating of the Android structure and behavior. 8 CHAPTER II ANDROID MALWARE REVERSE ENGINEERING Reverse Engineering is a process of analyzing an existing code or piece of software in order to scrutinize the software for any vulnerability or any errors. It has the ability to generate the source code from an executable. This technique is used to examine the functioning of a program or to evade security mechanisms in general computing security areas. From the beginning of 2009 Android malware reverse engineers began devising ways to reverse the Dalvik bytecode. The application which pre-compiled binary format is distributed in Android market and it is not possible to debug the source code directly. We need a disassembler that convert or reverse the Dalvik Bytecode into readable format. Undx[19] released in 2009, it could generate a JAR file from an APK file, and then it could converted to JAVA using JAD and JD-GUI. However, undx has a limitation with complex Dalvik Bytecode. Then, Dex2Jar [23] can deal with the complex code, and researcher have used it extensively to convert Dalvik Bytecode. The binaries for Dalvik Virtual Machines are in the .dex file format. Backsmali is a disassembler that is used for .dex files in Dalvik VM. Moreover Android-apktool is used for changing the source code and repackage it. Androguard is a reverse engineering, malware and good-ware analysis of Android applications written in python. Androguard 9 could be modified its python code by researcher for a specific research purpose easily, as well as it shows detailed information about the Android application. Besides no matter what your research on static or dynamic analysis categories, there are many other tools which help to reversed various applications. Next we explain common tools used in this study as well as others not used but are present for future work. Androguard Androguard1 is a reverse engineering, malware and good-ware analysis of Android applications and it is written in python to play with Dex/Odx, APK file, Android’s Binary XML and Android Resources(arsc) for Linux, OSX and Windows platform. This is useful for a static analysis of Android malware and it supports dex, apk, arsc, and xml file format. In Chapter 4 we use Androguard, mainly Androapkinfo.py, to extract all necessary information from APK files, because it is fast and easy to adapt a customized python code. The following are python scrips of Androguard. Androapkinfo.py display information such as permission, service, activities, receivers, usage of native code on an APK file. Androsign.py helps you to create your own signatures in order to add them in the database. Therefore it is easier after an analysis to isolate which parts are the most interesting to add in the database in order to detect the malware. The Androaxml.py is for transforming Android’s binary XML, AndroidManifest.xml, into human readable classic XML file. 1 http://code.google.com/p/androguard/ 10 Androdd.py is used to output graphs for each method or each class of an Android packages. Androdiff is used to compare or display the differences between two apps. Androsim.py is used to get the similarities between two apps. Androlyze.py kick off an interactive shell to perform all functionality of androguard available such as decompiling APK files, searching for permissions, or showing a control flow graph. Dex2jar It is intended to convert .dex files to human readable .class files in java. After downloading Dex2jar2, it should extract the zip file to a folder. Dex2jar will generate a file named such as someApk-dex2jar.jar(APK-jar) in the working folder. Figure 2 shows Linux and Windows commands to convert a .dex file into APK-jar file named someApk.apk. Figure 2. Dex2jar commands on Linux and Windows After convert APK-jar file, the source could be readable using JD-GUI or JAD. Figure 3 shows that APK-jar file is readable with a JD-GUI. The reversing process of non-obfuscated application shows the names of variables and methods are preserved and everything seems original code. However if the application is obfuscated one, the variables presents name such as ‘a’, ‘b’, ‘c’ and also the methods name are changed or 2 https://code.google.com/p/dex2jar/ 11 disappear, because they have been collapsed inside other methods by the optimization process. Figure 3. JD-GUI make possible to read .jar Dex2jar also has the ability to modify the code of an APK file. First, it translate the code from .dex to .jar file. Second, modify .class files in the .jar. Third, it translate .jar back to .dex and put into APK file. Fourth, the APK file is signed and finished the modification. Smali/Baksmali Smali/Baksmali3 is an assembler/disassembler for Android’s dex format used by dalvik, Android’s Java VM implementation. The syntax is loosely based on Jasmin’s dedexer’s syntax, and supports the full functionality of the dex format such as annotations, debug info, and line info. This tool is based on Linux platform and prerequisites JDK and 3 https://code.google.com/p/smali/ 12 Git before downloading smali files. After making Git repository on the linux machine smali can build with smali.jar and baksmali.jar files. Android-apktool Android-apktool4 is a tool for reverse engineering third party, closed, binary Android apps. It can decode resources to nearly original form and rebuild them after making some modification; it makes possible to debug smali code step by step. Also it makes working with app easier because of project-like files structure and automation of some repetitive tasks like building APK file. It brings the capability of reverse engineering Android APK codes using Java runtime environment. Due to Java portability this tool is usable in Windows, Linux and OSX operating system. Java Decompiler Java Decompiler5 are several tools in order to decompile and analyze Java 5 “byte code” and the later versions. JD-Core is a library that reconstructs Java source code from one or more .class files. JD-Core may be used to recover lost source code and explore the source of Java runtime libraries. JD-GUI and JD-Eclipse include JD-Core library. JDGUI is a standalone graphical utility that displays Java source codes of .class files. You can browse the reconstructed source code with the JD-GUI for instant access to methods and fields. JD-Eclipse is a plug-in for the Eclipse platform. It allows you to display all the Java sources during your debugging process, even if you do not have them all. JDIntelliJ is a plug-in for the IntelliJ IDEA. 4 5 http://code.google.com/p/android-apktool/ http://jd.benow.ca/ 13 Android-x86 Android-x866 provides a ready-to-use virtual machine disk which can be simply mounted and used to run original Android on VirtualBox. The advantage of this method is that the experience is 99% like an actual Android device, but with higher processor performance, physical memory and storage. Android Reverse Engineering(ARE) ARE7 is a comprehensive ready-to-use Android Reverse Engineering(A.R.E.) Virtual Machine to work with Android. Unlike Android-x86 this is a tool set for Android Reverse Engineering which contains some of the necessary tools. Tools currently included on A.R.E are Androguard, Android sdk/ndk, APKInspector, Apktool, Axmlprinter, Ded, Dex2jar, DroidBox, Jad, Smali/Baksmali APKinspector APKinspector8 is another python based tool which provides a GUI tool to aide analysis and reverse engineering of compiled Android packages and their DEX code. Davlik Retargeting (Dare) Dare9 retargets DEX and APK Android applications to raw .class files. You will need additional tool for further reverse engineering. 6 http://www.android-x86.org/ https://redmine.honeynet.org/projects/are 8 http://code.google.com/p/apkinspector/ 7 9 http://siis.cse.psu.edu/dare/index.html 14 Droidbox Droidbox10 is a sandbox which offers dynamic analysis of Android applications. These types of analysis mainly used to know the malware at first place; just to get some clues about the behavior of the malware. This tool provides network traffics, accessed files, accessed services, data leakages, circumvented permissions, cryptography operations and SMS/Phone calls of the malware. Dexter Dexter11 is a static android application analysis tool with a web-based user interface. This tool is good for a beginner who want to see an overview of APK file. The tool extracts as much as information as possible from either legitimate or malicious application’s APKs and displays them in various different views. However it is difficult to extract specific information and to narrow down its relationship. To analyze an android application file you need to upload an android application file to Dexter and it will automatically generate several different views about the application components like packages, classes, and the methods. After uploading an Android APK to Dexter, the import process is triggered and extract all the data and meta information from the file and store them in a database. Additionally, it start ‘autotagging’ process that goes through the classes and methods and looks at used strings, names, called functions and other information to automatically annotate these objects with tags and comments. This can provide you with a good starting point for conducting further analysis. The detection is based on heuristics so it is by no means complete and perfect. 10 11 http://code.google.com/p/droidbox/ http://dexter.dexlabs.org/static/docs/ 15 However it generally works nicely and can tell you a lot about APKs functionality on first sight. 16 CHAPTER III DROIDDREAM AND DROIDDREAMLIGHT 3.1 DroidDream Android Malware DroidDream is a mobile botnet type of malware that appeared in spring 2011[20]. This is the first Android malware which uses an exploit to gain root permissions in order to access unique identification information for the phone. Once compromised, a phone infected by DroidDream could also download additional malicious programs without the user’s knowledge as well as open the phone up to control by hackers. DroidDream got its name from the facts that the authors of DroidDream set the package name to include the string “com.droiddream” and also it was set up to run between the hours of 11:00 p.m. and 8:00 a.m. when users were most likely to be sleeping and their phones less likely to be in use. Additional variants of DroidDream have since appeared, including DroidDreamLight in June 2011 and a variant of DroidDreamLight that appeared a month later. DroidDream works in two phases [21] like general botnet malware in Windows. First phase, DroidDream is successful in rooting a device. Rooting is the process of allowing users of a smart phone to attain privileged control such as root access on the phone. DroidDream infects a device by breaking out of Android’s security container and 17 then it installs a second application, DownloadProviderManager.apk, as a system application on the device. Second phase, installing as a system application prevents a user from seeing or uninstalling the application without special permission; predominantly serve to maintain connection to the Command-and-Control (C&C) server to download and install other files. This application was designed to be automatically triggered and it can send additional sensitive information to a remote server and silently download other applications onto the infected device. The information are ProductID, Partner, International Mobile Equipment Identify (IMEI), International Mobile Subscriber Identify (IMSI), Model & SDK value, Language, Country, and UserID. The Android operating system contains many security-sensitive data that identify the user’s identification information and user specific setting. IMEI is a unique number that identifies the cell physical phone device. For example, Global System for Mobile(GSM) Communications identify valid phone by using IMEI in its cellular network [22]. IMSI is a unique number securely stored inside the phone’s SIM(subscriber identification module). The number is sent from the phone to the network and identifies the user’s mobile subscription and provider [23]. Besides there are Android ID, Mobile Subscriber Integrated Services Digital Network Number(MSISDN, phone number), contacts list, and SD card. DroidDream then attempts to take an inventory of all the applications it has previously installed. Once DroidDream has communicated its current status to the command and control server, the malware accepts the following commands: NextConnectTime, DownloadUrl, PackageName. Applications supplied by the C&C 18 server can be silently downloaded to an infected device. In the malware, there also appears to be a commands dealing with ratings, comments, assetIDs and install states, all of which relate to the Android Market. Though these appear incomplete, it’s possible the authors intended to listen to Android Market downloads and possible to trigger downloads and comments on downloaded applications. Because we don’t know the C&C server issue commands to download additional applications we cannot divine their exact purpose, however the possibilities are limitless. DroidDream could be considered a powerful zombie agent that can install any applications silently and execute code with root privileges at will. 3.2 DroidDreamLight DroidDreamLight were appeared in May 2011 by the same developers who brought DroidDream to market in March 2011. Although DroidDreamLight has similar name as DroidDream, it will work differently. It gather information about the device and try to upload the data to a list of websites defined by the Trojan. The Lookout Security Team [24] identified DroidDreamLight due to a tip from a developer who notified that modified versions of his app and another developer’s app were being distributed in the Android Market. Malicious components of DroidDreamLight are invoked on receipt of an android.intent.action.PHONE_STATE intent. Therefore it is not dependent on manual launch of the installed application to trigger its behavior. The broadcast receiver immediately launches the <package>.lightdd.CoreService which contacts remote servers 19 and supplies the IMEI, IMSI, Model, SDK Version and information about installed packages. DroidDreamLight is also capable of downloading and prompting installation of new packages, though unlike DroidDream is not capable of doing so without user intervention. 20 CHAPTER IV ANALYZING ANDROID MALWARE 4.1 Android Package File This Chapter suggest a method for detecting Android malware by analyzing AndroidManifest.xml file in APK file. APK file is the package file format used to distribute and install application on Android operating system. It contains all of the android manifest file, the program’s code in classes.dex files, and the application’s resources in resrouces.arc, lib, assets, certificates or res. Figure 4 shows a structure of APK file. Figure 4. APK file overview 4.2. Structure of AndroidManifest.xml file The manifest file presents essential information about the app to the Android system. (1) It names the Java package for the application, and the package name serves as 21 a unique identifier for the application. (2) It describes the components of the application – the activities, services, broadcast receivers, and content providers that the application is composed of. It names the classes that implement each of the components and publishes Figure 5. Structure of Android Manifest file their capabilities using Intent messages they can handle. These declarations let the Android system know what the components are and under what conditions they can be launched. (3) It determines which processes will host application components. (4) It declares which permissions the application must have in order to access protected parts of 22 the API and interact with other applications. (5) It also declares the permissions that others are required to have in order to interact with the application’s components. (6) It declares the minimum level of the Android API that the application requires. (7) It lists the libraries that the application must be linked against. Figure 5 shows the general structure of the manifest file and every element that it can contain [25]. Only the <manifest> and <application> elements are required, they each must be present and can occur only once. Elements at the same level are generally not ordered. For example, <activity>, <provider>, and <service>elements can be intermixed in any sequence. In a formal sense, all attributes are optional, however, there are some that must be specified for an element to accomplish its purpose. It mentions a default value or states what happens in the absence of a specification. Many elements correspond to Java objects, including elements for the application and its principal components such as activities <activity>, services <service>, broadcast receivers <receiver>, and content providers <provider>. If you define a subclass, the subclass is declared through a name attribute. The name must include the full package designation. For example, a Service subclass might be declared as following manner: <service android:name=”com.example.project.SecretService:….> When starting a component, Android creates an instance of the named subclass, SecretService. If a subclass isn’t specified, it creates an instance of the base class. If more than one value can be specified, the element is almost always repeated, rather than listing multiple values within a single element. For example, an intent filter can list several actions in the following manner: 23 <intent-filter> <action android:name=”android.intent.action.EDIT” /> <action android:name=”android.intent.action.INSERT” /> <action android:name=”android.intent.action.DELETE” /> </intent-filter> The core components of an application such as activities, services, and broadcast receivers, are activated by intents. An intent is a bundle of information describing a desired action: including the data to be acted on, the category of component that should perform the action, and other pertinent instructions. Android locates an appropriate component to respond to the intent, launches a new instance of the component if one is needed, and passes it the intent object. A permission is a restriction limiting access to a part of the code or to data on the device. The limitation is imposed to protect critical data and code. Each permission is identified by a unique label. If an application needs access to a feature protected by a permission, it must declare that it requires that permission with a <uses-permission> element in the manifest. Then when the application is installed on the device, the installer determines whether or not to grant the requested permission by checking the authorities that signed the application’s certificates and asking the user. If the permission is granted, the application is able to use the protected features. If not, its attempts to access those features will fail without any notification to the user. 24 4.3 Detecting DroidDream by analyzing the manifest file This Chapter 4 proposes a method for detecting Android malware’s behavior by analyzing permissions, intent-actions, intent-categories, and function names in APK files. Sixteen DroidDream samples were obtained from Android Malware Genome Project [26] that provides samples for research purpose. We transfer from the APK files to human readable information described in DroidDream samples using Android Reverse Engineering tool, Androguard, with bash shell. Based on the output information we can extract keywords of DroidDream. Not every keywords are characterized as keywords of DroidDream. Section 4.3.2 to Section 4.3.5 will discuss why we choose certain keywords as DroidDream parameters. These parameters will be using as attributes of a data mining dataset in Chapter 5. 4.3.1 Extract information from Android Package File Androidapkinfo.py from Androguard extracts important information by elements and make it readable output from APK file. Figure 6 show a framework for analyzing DroidDream in a diagram. To get outputs efficiently from DroidDream samples we create a shell script named GENERATEOUTPUT.sh. This script will read all samples in DroidDream directory, and create all output in OutDroidDream directory. The source code is attached in appendix. The output name will be the same name as the input file with .out extension. If you want to extract other malwares, you can switch DroidDream directory to other directory name such as DroidDreamLight. 25 The output file in OutDroidDream has all information by element such as permission, service, activities, receivers, providers, and usage of native code. To analyze same keywords in DroidDream we create APKCOUNTER.py which is counting same keywords by element in those output files. It generate a list of keyword and count, such as [{‘ACCESS_NETWORK_STATE’, 7}, {‘ACCESS_WIFI_STATE’ ,15 }….. ]. Keyword could be any name depends on the names and the count number is a count number for each keyword in the output files. Figure 6. Framework for Analyzing DroidDream Since the results shows over nine hundreds of keywords, we choose only 17 parameters to represent DroidDream’s behavior. The heuristics for DroidDream parameter selection are: Keyword should be related with DroidDream’s behavior Two-thirds majority(supermajority) rule is required in keyword’s count number, therefore the count number should be larger than ten.( 2/3 of 16 is 10.6 ) Exclude common, obfuscated, or lifecycle of application keyword Next section explains how to choose these 17 parameters in permission, intent-filter, service, and function names in details. 26 4.3.2 Permission Each Android application includes a manifest file that lists the permissions requested by the application. When an application is installed, the permission are shown to the user who decides whether to proceed with the installation or to cancel it. There are some important permissions of the Android operating system that define what an application has access to. Table 1 shows permission classes, number of frequency and descriptions for each class which detected frequently in DroidDream malware applications. Table 1. Permissions in DroidDream Permission Class Count Class Description ACCESS_NETWORK_STATE 7 Allow applications to access information about networks. ACCESS_WIFI_STATE 15 Phone Information Internet Allow applications to access information about WiFi Network. CHANGE_WIFI_STATE 15 Allow applications to change WiFi connectivity state. INTERNET 16 Allow applications to open network sockets. READ_CONTACTS, 2 Allow an application read and write access to the mobile WRITE_CONTACTS 2 phone’s contact list. READ_LOGS 4 Allow an application to read the low-level system log files. READ_PHONE_STATE 16 Allow read only access to phone state Bolded class names such as ACCESS_WIFI_STATE, CHANGE_WIFI_STATE, INTERNET, READ_PHONE_STATE at Table 1, are chosen as DroidDream parameters which satisfied with two following reasons. First, the permission name should be associated with a behavior of DroidDream. DroidDream has the ability to root the device 27 and send phone information to an external server, therefore basically this malware needs Internet and Phone Information related permissions. Second a count number should be more than ten, 2/3 number of APK files. Permission shows only one time in each APK file. If permission keyword show up more than ten, it means the keywords is a supermajority of DroidDream APK files and it could be a DroidDream parameter. 4.3.3 Intent-filter : activity and category Generally <Activity>, <Services>, <Receiver>, <Provider> elements in AndroidManifest.xml file have <intent-filter> element that has optional intent.action and intent.category tags. These core components of an application activated by intents. An intent is a bundle of information describing a desired action. It has the data to be acted on, the category of component that should perform the action, and other pertinent instructions. Android locates an appropriate component to respond to the intent, launches a new instance of the component if one is needed, and passes it the intent object. Therefore, discovering an action name or a category name in core components could help to detect DroidDream. Table 2 and Table 3 show the result of action and category name after counting the frequency of <intent-filter> element. The information is mainly came from <Activity>, <Service>, and <Receiver>, but not from <Provider>. Like permission element, intent-filter element’s keywords show only one time at each APK file. If each keyword show up more than ten, it means the keywords is a supermajority of DroidDream APK files and it could be a DroidDream parameter. However, we didn’t select android.intent.action.MAIN and android.intent.category.LAUNCHER to the parameter list, despite of the count number is 28 over ten in Table 2 and 3. Because these are common intent in android application and these names cannot be an indication of DroidDream. Android.intent.action.MAIN is the entry point of the application, i.e. when you launch the application, this activity is created. android.intent.category.LAUNCHER should appear in the Launcher as a top-level application, i.e. entry point should be listed in the application launcher. Table 2. Action name Action name Count Android.intent.action.BOOT_COMPLETED 7 Android.intent.action.MAIN 15 Table 3. Category name Category name Count Android.intent.category.DEFAULT 8 Android.intent.category.LAUNCHER 15 Android.intent.category.OPENABLE 1 4.3.4 Service A service is an application component that can perform long-running operations in the background and does not provide a user interface. Another application component can start a service and it will continue to run in the background even if the user switches to another application. Additionally, a component can bind a service to interact with it 29 and even perform interprocess communication. It means a service runs in the main thread of its hosting process in Android application and malicious service could find in this part. In Table 4 there are only two service: com.android.rootAlarmReceiver and com.root.setting. These two service is added as a DroidDream parameter because these service decrypts a byte buffer which contains the IP address and the URL of the service which is used to post data about the infected phone [27]. Table 4. Services in DroidDream Service name Count com.android.root.AlarmReceiver 15 com.root.Setting 15 4.3.5 Selected Function name The last part of output files shows function names and their methods information. Table 5 shows the extracted function names and the count of them. Bolded names in Table5 such as changeWifiState, Dopermroot, getIMEI, getIMSI, getRawResource, installs, isPackageInstalled, onReceive, postUrl, removeExploit, and restoreWifiState, are chosen as DroidDream parameters based on two reasons. First we eliminates too many counted function because these are common functions which is frequently used in regular Android application such as onClick, onCreate, onDestroy. And also we eliminate functions which has a obfuscated name like a,b,c…..m. 30 Second the function name should be associated with behavior of DroidDream. For examples, getIMEI, getIMSI, and getRawResource can collect user’s phone information. Installsu can get root permission on the phone, and isPackageInstalled can check whether Table 5. Functions in DroidDream Function name Count Function name Count A, b, c, d, e, f, g,j, k, l, m 24 ~ 433 onClick 131 applyTransformation 32 onCreate 121 changeWifiState 15 onDestroy 19 Dopermroot 15 onReceive 23 getIMEI 15 postUrl 15 getIMSI 15 removeExploit 15 getRawResource 15 restoreWifiState 15 installs 15 shouldOverrideUrlLoading 11 isPackageInstalled 15 the additional package is installed or not. onReceive helps to receive additional information or application from network. postUrl could be dangerous because it can post a specific url. Due to DroidDream uses an exploit to gain root permissions, it is suspicious to have a removeExploit function. changeWiFiState and restoreWiFiState are suspicious because the mobile device can connect wifi without user’s acknowledge. However, we eliminate unrelated functions, for example, applyTransformation doesn’t look like a threat at all because it helps to transform an animation image. 31 CHAPTER V EXPERIMENT 5.1 Detecting similar DroidDream behaviors in DroidDreamLight In Chapter 4 we propose a new technique to extract DroidDream’s parameter from APK files. From this technique we already extracts and analyze 17 DroidDream’s behavior parameters, which shows in Figure 6. Based on these 17 parameters we conduct an experiment to find a similarity between DroidDream and DroidDreamLight malware families through data mining techniques. Figure 7. 17 Parameters after analyzing DroidDream. 32 To evaluate the performance of the proposed method, we performed a data mining experiment with 46 DroidDreamLight malware application samples (DDL) and 46 top free Android application samples (TOPS). The 46 DDL are obtained from Android Malware Genome Project for research purpose. The 46 TOPS are downloaded from Google Play store [28]. These TOPS are ranked from 1 to 46 of top free applications on September 18th, 2014, and we use a website called APK downloader [29] to download the application as APK file format. TOPS’s application names and APK file names are attached in Appendix A. DDL’s APK names are a unique MD5 hash value and TOPS’s APK names are irregular depend on the publisher. To specify DDL or TOPS behavior we add a decision parameter APPS to the 17 parameters, therefore we finally have 18 parameters. This APPS decision parameter have only one possible value among DDL or TOPS. Figure 8. Example of data file. Weka [30] data mining software needs ARFF or CSV data file format to analyze the dataset. We create a COUNTPARAMETER.py which detect 18 parameter keywords and record ‘TRUE’ or ‘FALSE’ depending on the existence in each APK file. If the parameter exists in the APK file, the value is ‘TRUE’ and if not, the value is ‘FALSE’. As a result, we have an output file which include 92 lines of 18 parameter. Using Microsoft Excel we easily convert this text format to CSV format. In Figure 7 the first 33 line shows 18 parameter name comma separated. In this data file, from second line, each line will show the value extracted from each APK file. We conduct two data mining classification, one is Naïve Bayesian classification and the other is a J48 classification. Both classifications use the same size dataset which splits 66:34 as a training set and a test set. A training set is used to determine the suitable threshold values used by Weka, and a test set is used to evaluate the behavior of DDL and TOPS based on DroidDream parameters. DroidDreamLight is written by the same person who created DroidDream. It appeared in May 2011, two months after DroidDream appeared. If we can detect a similarity of DroidDreamLight based on DroidDream among other applications, the proposed technique could be used for not only notifying the similarity between two malware family, but also detecting unknown malware which behavior is similar to wellknown malware before we install it. 5.2 Naïve Bayesian Classification The dataset randomly split 66:34 as a training set and a test set. The results show that the correctly classified instances are 90.3%. Table 6 visualize a successful performance of this experiment in a confusion matrix. Each column of the matrix represents a different two actual types such as DDL and TOPS while each row represents predicted instances such as DroidDream behaviors and Non-DroidDream behaviors. To classify each attribute value, their probabilities of a decision value should be considered. The Naïve Bayesian model shows that a relative frequency of class DLL, 34 P(DDL), is 0.5 and a relative frequency of class TOPS, P(TOPS), is 0.5. These frequencies of class will multiply with attributes’ probabilities for classifications. Table 6. Confusion Matrix from Naïve Bayesian classification Actual Positive (DDL) (True Positive) 12 (False Negative) 3 Actual Negative (TOPS) (False Positive) 0 (True Negative) 16 Predicted Positive (DroidDream) Predicted Negative (Non-DroidDream) In Table 7, 17 DroidDream parameters are lined up with a probability. For example, restoreWifiState doesn’t exist in DDL or TOPS, CHANGE_WIFI_STATE doesn’t exist 94% in DDL and 84% in TOPS. getIMEI exists 70% in DDL but it only exists 4% in TOPS. From the result, we can find an interesting fact that the shaded parameters such as getIMEI, isPackageInstalled, getRawResource, getIMSI, and ACCESS_WIFI_STATE act oppositely between DDL and TOPS. In other words getIMEI, isPackageInstalled, getRawResource, and getIMSI parameters are mostly seen in DDL but not in TOPS. However, ACCESS_WIFI_STATE is not seen much in DDL, but it is seen very frequently in TOPS. getIMEI and getIMSI is a function to collect a phone’s IMEI and IMSI information. These security sensitive information will send to a remote server such as C&C server. getRawResource is also a function to collect raw data, which could be a product id, partner, model, SDK value, language, country, user id, and etc. isPackageInstalled is a function to check which package are installed or not installed in infected device. This installed package information is useful for the C&C server which 35 manage to install an additional package onto the infected device. These results indicate that DDL’s behavior is similar to DroidDream’s behavior. DDL shows high probability Table 7. Output of Naïve Bayesian Classification Parameters restoreWifiState removeExploit CHANGE_WIFI_STATE postUrl getIMEI installSu dopermroot isPackageInstalled changeWifiState getRawResource getIMSI onReceive AlarmReceiver root.Setting INTERNET READ_PHONE_STATE ACCESS_WIFI_STATE DDL TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TOPS DDL x 0.5 1 1 1 0.06 0.94 0.02 0.98 0.7 0.3 1 0.16 0.84 0.04 0.96 0.04 0.96 1 1 1 0.69 0.31 1 0.08 0.92 1 0.69 0.31 0.7 0.3 0.98 0.2 0.02 0.98 1 0.02 0.98 0.04 0.96 0.96 0.04 0.31 0.69 1 1 1 1 0.98 0.02 0.08 0.92 0.69 0.31 0.73 0.27 36 0.5 0 0.5 0.03 0.47 0.01 0.49 0.35 0.15 0 0.5 0 0.5 0.345 0.155 0 0.5 0.345 0.155 0.35 0.15 0.49 0.1 0.01 0.49 0 0.5 0.5 0 0.49 0.01 0.04 0.46 TOPS x 0.5 0.5 0 0.5 0.08 0.42 0.02 0.48 0.02 0.48 0 0.5 0 0.5 0.04 0.46 0 0.5 0.01 0.49 0.02 0.48 0.48 0.02 0.155 0.345 0 0.5 0.5 0 0.345 0.155 0.365 0.135 of existence on same function name as DroidDream, however TOPS shows higher probability of existence only READ_PHONE_STATE and ACCESS_WIFI_STATE. These two permissions are very common for not only malware but also non-malware. 5.3 J48 Classification The J48 is an open source Java implementation of the C4.5 algorithm in the Weka data mining tool. C4.5 builds decision trees from training data sets, and it choose a node that most effectively splits its set of samples into subsets enriched in one class or the other. We conduct two experiments with a different dataset in J48 classification. At the first J48 classification we choose only five parameters from DroidDream parameters: getIMEI, getIMSI, isPackageInstalled, getRawResource, and ACCESS_WIFI_STATE. These parameters are analyzed as most efficient DroidDream parameters to distinguish DDL and TOPS from previous Naïve Bayesian classification. Only this five parameters are used in the first J48 classification. The second J48 classification we are used all 17 DroidDream parameters same as we did at the previous Naïve Bayesian classification. Through these classifications we can compare the performance from different set of parameters we choose, and also we can extract new patterns of DDL and TOPS. A dataset randomly split 66:34 as a training set and a test set. 5.3.1 Experiment 1: Result from J48 classification with 5 parameters The results show that the correctly classified instances are 87.1%. Table 8 visualize a successful performance of this experiment in a confusion matrix. Each column 37 of matrix represents a different two actual types such as DDL and TOPS while each row represents predicted instances such as DroidDream behaviors and Non-DroidDream behaviors. Table 8 shows most instances are in diagonal of the table and only minor errors are represented by value outside the diagonal. Table 8. Confusion Matrix from J48 classification with 5 parameters Actual Positive (DDL) (True Positive) 15 (False Negative) 0 Actual Negative (TOPS) (False Positive) 4 (True Negative) 12 Predicted Positive (DroidDream) Predicted Negative (Non-DroidDream) The matrix used are: True Positive : Number of DDL classified as DroidDream True Negative : Number of TOPS classified Non-DroidDream False Positive : Number of TOPS classified as DroidDream False Negative : Number of DDL classified as Non-DroidDream TP rate = TP/(TP+FN) = 15/(15+0) = 1 FP rate = FP/(TN +FP) = 4/(12+4) = 0.25 Accuracy = TP+TN/(TP+TN+FP+FN) = (15+12)/(15+12+4+0) = 0.87 (87%) Figure 9. Result of J48 decision tree with 5 parameters Decision tree induction is closely related with the rule induction. Each path that starts from the root to the leaf represents a rule. A decision tree in Figure 8 has 38 getRawResource as a root and ACCESS_WIFI_STATE as a node, and DDL and TOPS as leaf nodes. The number of leaves is 3 and the size of tree is 5. Most of DDL has getRawResource in APK file. If not, also ACCESS_WIFI_STATE is not in the APK file. However, TOPS usually doesn’t have getRawResource in APK file, but ACCESS_WIFI_STATE is in their APK files. 5.3.2 Experiment 2: Result from J48 classification with 17 parameters The results shows that the correctly classified instances are 100%. Figure 11 visualize a successful performance of this experience in a confusion matrix. Each column of matrix represents a different two actual types such as DDL and TOPS while each row represents predicted instances such as DroidDream behaviors and Non-DroidDream behaviors. Table 9 shows most instance are in diagonal of the table and only minor errors are represented by value outside the diagonal. Table 9. Confusion Matrix from J48 classification with 17 parameters Actual Positive (DDL) (True Positive) 15 (False Negative) 0 Actual Negative (TOPS) (False Positive) 0 (True Negative) 16 Predicted Positive (DroidDream) Predicted Negative (Non-DroidDream) The matrix used are: True Positive : Number of DDL classified as DroidDream True Negative : Number of TOPS classified Non-DroidDream False Positive : Number of TOPS classified as DroidDream 39 False Negative : Number of DDL classified as Non-DroidDream TP rate = TP/(TP+FN) = 15/(15+0) = 1 FP rate = FP/(TN +FP) = 0/(16+0) = 0 Accuracy = TP+TN/(TP+TN+FP+FN) = (15+16)/(15+16+0+0) = 1 (100%) Figure 10. Result of J48 decision tree with 17 parameters Decision tree induction is closely related with the rule induction. Each path that starts from the root of a decision tree and ends at one of its leave represents a rule. In Figure 9 this decision tree includes getRawResource as root, and ACCESS_WIFI_STATE, READ_PHONE_STATE, AlarmReceiver as internal nodes. DDL and TOPS as leaf nodes which were classified. The number of leaves is 5 and the size of tree is 9. Compared to experiment 1, experiment 2 shows significantly improving result because of 17 parameters in dataset. These increasing parameters also make to build the decision tree more informatively when classify a class. In other words, the decision tree of experiment1 is too simplified, and this is a subset of the one of experiment 2. In this paper we only use two different size of attributes for J48 classification, but if we continue to classify with variance in number of attributes such as 10 or 15, we may find a suitable size of attributes for the classification. 40 CHAPTER VI CONCLUSION During the past 5 years the more Android malware application increase, the more Android malware detection technique and tools are developed. The more Android malware detecting technique developed, the more malware writer created a new technique to hide their malicious code and behavior. There is not yet an absolute solution that can detect and cure for all malware applications, but considerable progress has been made. By knowing how a general Android application behave and how the Android malware application behave and affect Android devices, this Android malware study will advanced in the development of better detection technique for Android malware application. This paper has attempted to sketch out the framework of Android malware and propose a method to detect a similar malware behavior through analyzing APK files and conducting a data mining. It has been written with three purposes in mind: to introduce general Android malware methods and research, to propose a new method to extract important behavior information from APK files using Android reverse engineering tool, and to use these observations to verify a usability of the proposed method through data mining technique. 41 More specifically, the findings from the experiments confirm that our method is useful to detect a similar behavior among different Android malware families, especially DroidDream and DroidDreamLight. By Naïve Baysian classification we find similar behaviors such as getIMEI, isPackageInstalled, getRawResource, and getIMSI between two malwares, compared to benign wares. By J48 decision tree classification we can find rules to classify two groups: a malware (DDL) and a benign-ware (TOPS). In this research we only use two malware families, but we can extend these dataset to large number of malware families as needed. If we find out each specific behaviors of malware families by analyzing APK files without running it, we can save tons of time and efforts to analyze one APK file. Furthermore, we may use the characteristic behavior information to detect a new malware application which have a similar behavior to the previous malware. In future there is a possibility to have not only Android Trojan but also an Android worm and Android rootkit because it is similar to the trend of traditional nonmobile malware. To evade traditional detection methods, polymorphic and metamorphic Android malware are another possibility. There are a number of issues that remain, however, it is to be hoped that this paper will yield general insights into Android malware research. 42 REFERENCES [1] Tech crunch, Android Now Seeing 550,000 Activations Per Day, http://techcrunch.com/2011/07/14/android-now-seeing-550000-activations-per-day/ , July 2011 [2] Gartner Corporation. Gartner Says Worldwide PC, Tablet and Mobile Phone Combined Shipments to Reach 2.4 Billion Units in 2013, http://www.gartner.com/newsroom/id/2408515, April 2013 [3] Gartner Corporation. Gartner Says Annual Smartphone Sales Surpassed Sales of Feature Phones for the First Time in 2013, http://www.gartner.com/newsroom/id/2665715, Feb. 2013 [4] Juniper Networks, Juniper Networks Finds Mobile Threats Continue Rampant Growth as Attackers Become More Entrepreneurial, http://newsroom.juniper.net/pressreleases/juniper-networks-finds-mobile-threats-continue-ram-nyse-jnpr-1029552, June 2013 [5] C. A. Castillo, "Android Malware Past, Present, and Future," White Paper of McAfee Mobile Security Working Group, 2011. [6] Iland D, Pucher A, “Detecting Android Malware on Network Level,” University of California, Santa Barbara, December 2011 [7] Isohara T., Takemori K., Kubota,A., “Kernel-based behavior analysis for Android malware detection. 2011 Seventh International Conference on Computational Intelligence and Security, 3-4 December 2011, Sanya, Hainan Province, China. [8] W.Enck, M.Ongtang, “On lightweight mobile phone application certification,” in Proc. of ACM Conference on Computer and Communication Security, 2009, pp.235-245 [9] Wu D, Mao, “DroidMat:Android malware detection through manifest and API calls tracing. 2012 Seventh Asia Joint Conference on Information Security, 9-10 August 2012, Tokyo, Japan. [10] A.P.Felt, E.Chin, “Android permissions demystified,” in Proc. of ACM Conference on Computer and Communications Security, 2011, pp.627-638 43 [11] M.Grace, Y.Zhou, “Riskranker:scalable and accurate zero-day android malware detection,” in Proc. of International Conference on Mobile Systems, Applications, and Services, 2012, pp.281-294 [12] W.Enck, P.Gilbert, “Taintdroid:An information flow tracking system for realtime privacy monitoring on smartphones,” in Proc. of USENIX Smposium on Operating Systems Design and Implementation, 2010, pp 393-407 [13] Y.Zhou, Z.Wang, “Hey, you, get off of my market:Detecting malicious apps in official and alternative android markets,” in Proc. of Network and Distributed System Security Symposium, 2012 [14] L.K. Yan, H. Yin, “DroidScope: seamlessly reconstructing the OS and Dalvik semantic analysis.” Proceedings of the 21th USENIX Security, Symposium, 2012. [15] J. Zico Kolter, Marcus A. Maloof, “Learning to Detect and Classify Malicious Executables in the Wild,” In Proceedings of the International Conference on Knowledge Discovery and Data Mining, 2006 [16] Matthew G.Schultz, Eleazar E,“Data mining methods for detection of new malicious executables”, In proc. of the 2001 IEEE Symposium on Security Privacy, p38, 2001 [17] A.Walenstein, R. Mathur, “Normalizing metamorphic malware using term rewriting,” In Source Code Analysis and Manipulation, pp 75-84, 2006 [18] A.Walenstein, R. Mathur, “Normalizing metamorphic malware using term rewriting,” In Source Code Analysis and Manipulation, pp 75-84, 2006 [19] Undx, http://sourceforge.net/projects/undx/ , April, 2013 [20] DroidDream, http://www.webopedia.com/TERM/D/droiddream.html , [21] Lookout, “Do Androids Dream….?” https://blog.lookout.com/blog/2011/03/06/doandroids-dream%E2%80%A6/, March 2011 [22] International Mobile Station Equipment Identity, http://en.wikipedia.org/wiki/Imei [23] International Mobile Station Equipment Identity (IMEI), http://en.wikipedia.org/wiki/IMSI or http://www.tutorialspoint.com/gsm/gsm_addressing.htm [24] Lookout, “Update: Security Alert: DroidDreamLight, New Malware from the Developers of DroidDream,” https://blog.lookout.com/blog/2011/05/30/security-alertdroiddreamlight-new-malware-from-the-developers-of-droiddream/ , May 2011 44 [25] Android, App Manifest, http://developer.android.com/guide/topics/manifest/manifest-intro.html [26] Android Malware Genome Project. http://www.malgenomeproject.org/, [27] Nakedsecurity, Aftermath of the Droid Dream Android Market malware attack , http://nakedsecurity.sophos.com/2011/03/03/droid-dream-android-market-malwareattack-aftermath/, March, 2011 [28] Google Play, https://play.google.com/store, September, 2014 [29] Apk-downloadder, http://apps.evozi.com/apk-downloadder, September, 2014 [30] Weka, http://www.cs.waikato.ac.nz/ml/weka/, September, 2014 45 APPENDICES 46 APPENDIX A TOPS LIST These are 46 TOPS application and APK file names from Google Play store on September 18th, 2014. Rank TOPS Application Name APK filename 1 Facebook Messenger com.facebook.orca.apk 2 Facebook com.facebook.katana.apk 3 Pandora Internet Radio com.pandora.android.apk 4 Instagram com.instagram.android.apk 5 Super-Bright LED Flashlight com.surpax.ledflashlight.panel.apk 6 Spider-Man Unlimited com.gameloft.android.ANMP.GloftSIHM.apk 7 Snapchat com.snapchat.android.apk 8 Netflix com.netflix.mediaclient.apk 9 Candy Crush Saga com.king.candycrushsaga.apk 10 What's App Messenger com.whatsapp.apk 11 Skype com.skype.raider.apk 12 Kik kik.android.apk 13 Twitter com.twitter.android.apk 14 Clash of Clans com.supercell.clashofclans.apk 47 15 Spotify Music com.spotify.music.apk 16 Zedge Ringtones Wallpapers net.zedge.android.apk 17 Wipeout 2 com.activision.wipeout.apk 18 Diamond Digger Saga com.midasplayer.apps.diamonddiggersaga.apk 19 ebay com.ebay.mobile.apk 20 Tango Messenger, Video & Calls 21 Racing Rivals com.ciegames.RacingRivals.apk 22 Pinterest com.pinterest.apk 23 Walmart com.walmart.android.apk 24 Jump Jump Ninja com.ninjajumprun.android.apk 25 Yahoo com.yahoo.mobile.client.android.mail.apk 26 Hidden object candy world air.com.differencegames.hocandycrunchfree.apk 27 Bee Bubble Shooter com.xbubble.bubbleisland2.apk 28 Solitaire com.spacegame.solitaire.apk 29 iHeartRadio com.clearchannel.iheardradio.controller.apk 30 Age or Warring Empire com.stac.empire.main.apk 31 Flutter com.mobage.ww.a1675.FlutterMobile_Android.apk 32 Empire: Rome Rising com.feelingtouch.dipan.slaggameglobal.apk 33 Shazam com.shazam.android.apk 34 Township com.playrix.township.apk 35 SoundCloud com.soundcloud.android.apk 36 Don't Tap the White Tile com.umonistudio.tile.apk 37 Game of War - Fire Age com.machinezone.gow.apk com.sgiggle.production.apk 48 38 Emoji Keyboard com.qisiemoji.inputmethod.apk 39 The Weather Channel com.weather.Weather.apk 40 NFL Mobile com.gotv.nflgamecenter.us.lite.apk 41 Bubble Witch 2 Saga com.midasplayer.apps.bubblewitchsaga2.apk 42 Temple Run 2 com.imangl.templerun2.apk 43 Yummy Mania com.spacegame.dessert.apk 44 Photo Grid - Collage Maker com.roidapp.photogrid.apk 45 Slots Big Win Casino com.fivestargames.slots.apk 46 Adobe Reader com.adobe.reader.apk 49 APPENDIX B INSTRUCTION OF ANALYZING DROIDDREAM Linux 12.04 LTS / Python 2.7.x GENERATEOUTPUT.sh, APKCOUNTER.py, GLOBAL.py 1. All DroidDream Apk files are in DroidDream directory 2. Download Androguard from google, then you will have a directory named androguard 3. Put DroidDream and GENERATEOUTPUT.sh into androguard directory 4. In androguard directory, mkdir DroidDreamOut 5. Run GENERATEOUTPUT.sh $./GENERATEOUTPUT.sh DroidDream 6. The outputs are all in DroidDreamOut directory. 7. Put DroidDreamOut, APKCOUNTER.py, GLOBAL.py to new directory 8. You may need to install json module for APKCOUNTER.py 9. Run APKCOUNT.py $Python APKCOUNTER.py DroidDreamOut 10. It will make count_permission, count_activity, count_service, count_receiver, count_functions, count_others, and count_all files. Count_all file has all contents in other files. These files show key-parameter and its count number. 50 APPENDIX C INSTRUCTION OF ANALYZING DDL AND TOPS Linux 12.04 LTS / Python 2.7.x , COUNTPARAMETER.py, MS EXCEL, WEKA 1. DroidDreamLight’s APKs are in DroidDreamLight/ and TOPS’s APKs are in TOPS/ 2. Put DroidDreamLight/, TOPS/, COUNTPARAMETER.py into same directory 3. Run $./COUNTPARAMETER.py DroidDreamLight 4. The output will be outputParameter.txt, and there are a list of Boolean value(TRUE/FALSE) for each APK file. If the parameter included in APK, it return ‘TRUE’. 5. Copy outputParameter.txt to Windows pc and read by excel file. In excel file, first line should be parameter names. You can move the parameter name to this line, and remove unnessasary parameter columns and unnecessary character such as “{“, “}”, “:”. Then save it as CSV file format as DDLdata.csv. Figure 10 shows the example of format. 6. Follow step 3 to 5 for TOPS. Run $./COUNTPARAMETER.py TOPS. The output will be ‘outputParameter.txt’ file again, so it’s better to change a previous outputParameter.txt name to other name such as ‘DDL_outputParameter.txt’ if you want to keep DDL’s output. Save the CSV file to TOPSdata.csv. 51 7. Open DDLdata.csv and copy TOPSdata.csv to it. The first line should be parameter names. From the second line it will be all data with boolean value. If you have another parameter title line from TOPSdata.csv, remove those line. Insert new column on the left, and make a decision parameter ‘APPS’ at the top of the line, and put DDL/TOPS in the following cell depend on where it from. If the data from DDLdata, put ‘DDL’, but if the data from TOPS, put ‘TOPS’. And save this file as Alldata.csv. 8. Load Weka and start datamining with Alldata.csv as a dataset. 52 APPENDIX D GENERATEOUTPUT.SH #!/bin/bash if [ "$1" == "" ] then echo "command: ./GENERATEOUTPUT.sh [directory]" exit fi CurrentDIR=$1 #DroidDream FILES=`ls -l --time-style="long-iso" $CurrentDIR | egrep '^-' | awk '{print $8}'` for FILE in $FILES do echo "--------" ${FILE} "Start--------" python ../androguard/androapkinfo.py ${CurrentDIR}Out/${FILE}.out done 53 -i ${CurrentDIR}/${FILE} > APPENDIX E APKCOUNT.PY #!/usr/bin/python import sys, getopt import GLOBAL import os import json dic_per = {} dic_act = {} dic_intent_act = {} dic_intent_cat = {} dic_ser = {} dic_rec = {} dic_rintent_act = {} dic_rintent_cat = {} dic_func = {} dic_nativecode = {} dic_nativecode = {'True': 0 , 'False' : 0} dic_dynamiccode = {} dic_dynamiccode = {'True': 0 , 'False' : 0} dic_reflectcode = {} dic_reflectcode = {'True': 0 , 'False' : 0} dic_obfuscatcode= {} dic_obfuscatcode = {'True': 0 , 'False' : 0} def CheckString(Directory, filename): inFile = open(Directory+"/"+filename, 'r') # outFile = open(GLOBAL.REPORTFILE, "w") pbuffer = [] keepCurrentSet = True count = 0 BoolPer = False BoolAct = False BoolSer = False BoolRec = False BoolFunc = False 54 for line in inFile: # Reading line and decide parameter if line.startswith('PERMISSIONS:'): BoolPer = True elif line.startswith('MAIN ACTIVITY:'): BoolPer = False s = line pl= s.split(); para_mainact = pl[2] elif line.startswith('ACTIVITIES:'): BoolAct = True elif line.startswith('SERVICES:'): BoolAct = False BoolSer = True elif line.startswith('RECEIVERS:'): BoolSer = False BoolRec = True elif line.startswith('PROVIDERS:'): BoolRec = False s = line pl= s.split(); para_provider = pl[1] elif line.startswith('Native code:'): s = line pl= s.split(); if (pl[2]=='True'): count = dic_nativecode['True'] + 1 dic_nativecode.update({'True' : count}) else: count = dic_nativecode['False'] + 1 dic_nativecode.update({'False': count}) elif line.startswith('Dynamic code:'): s = line pl= s.split(); if (pl[2]=='True'): count = dic_dynamiccode['True'] + 1 dic_dynamiccode.update({'True' : count}) else: 55 count = dic_dynamiccode['False'] + 1 dic_dynamiccode.update({'False': count}) elif line.startswith('Reflection code:'): s = line pl= s.split(); if (pl[2]=='True'): count = dic_reflectcode['True'] + 1 dic_reflectcode.update({'True' : count}) else: count = dic_reflectcode['False'] + 1 dic_reflectcode.update({'False': count}) elif line.startswith('Ascii Obfuscation:'): s = line pl= s.split(); if (pl[2]=='True'): count = dic_obfuscatcode['True'] + 1 dic_obfuscatcode.update({'True' : count}) else: count = dic_obfuscatcode['False'] + 1 dic_obfuscatcode.update({'False': count}) elif line.startswith('Lcom'): BoolFunc = True # Find parameters if BoolPer == True: s = line #inFile.next() pl= s.split(); if pl[0] in dic_per.keys() : #print "dic_value1",dic_per[pl[0]] dic_per[pl[0]] = dic_per[pl[0]] + 1 #print "dic_value2",dic_per[pl[0]] else: dic_per.update({pl[0]:1}) #print "dic_per", dic_per #print dic_per elif BoolAct == True: s = line 56 pl= s.split(); #print pl if pl[0] in dic_act.keys() : #print "dic_value1",dic_act[pl[0]] dic_act[pl[0]] = dic_act[pl[0]] + 1 #print "dic_value2",dic_act[pl[0]] else: dic_act.update({pl[0]:1}) #print "dic_act", dic_act #dic_act = pl[0] #print dic_act lenpl = len(pl) for i in range(1,lenpl): #print pl[i] if '.action.' in pl[i]: if pl[i] in dic_intent_act.keys() : #print "dic_value1",dic_act[pl[0]] dic_intent_act[pl[i]]=dic_intent_act[pl[i]]+ 1 #print "dic_value2",dic_act[pl[0]] else: dic_intent_act.update({pl[i]:1}) #print "dic_act", dic_act #print dic_intent_act elif '.category.' in pl[i]: if pl[i] in dic_intent_cat.keys() : #print "dic_value1",dic_act[pl[0]] dic_intent_cat[pl[i]]=dic_intent_cat[pl[i]] +1 #print "dic_value2",dic_act[pl[0]] else: dic_intent_cat.update({pl[i]:1}) #print "dic_act", dic_act elif BoolSer == True: s = line pl= s.split(); if pl[0] in dic_ser.keys() : dic_ser[pl[0]] = dic_ser[pl[0]] + 1 else: dic_ser.update({pl[0]:1}) #dic_ser = pl[0] elif BoolRec == True: 57 s = line pl= s.split(); if pl[0] in dic_rec.keys() : dic_rec[pl[0]] = dic_rec[pl[0]] + 1 else: dic_rec.update({pl[0]:1}) lenpl = len(pl) for i in range(1,lenpl): #print pl[i] if '.action.' in pl[i]: if pl[i] in dic_rintent_act.keys() : dic_rintent_act[pl[i]]=dic_rintent_act[pl[i]]+1 else: dic_rintent_act.update({pl[i]:1}) #print dic_intent_act elif '.category.' in pl[i]: if pl[i] in dic_rintent_cat.keys() : dic_rintent_cat[pl[i]] = dic_rintent_cat[pl[i]] + 1 else: dic_rintent_cat.update({pl[i]:1}) elif BoolFunc == True: s = line pl= s.split(); if pl[1] in dic_func.keys() : dic_func[pl[1]] = dic_func[pl[1]] + 1 else: dic_func.update({pl[1]:1}) def Begins(Directory): for filename in os.listdir(Directory): if filename.endswith(".out"): CheckString(Directory, filename) # Permission dictionary with open("count_permission",'w') as f: print >> f, json.dumps(dic_per, indent =1, sort_keys=True) # Activity dictionary with open("count_activity",'w') as f: print >> f, json.dumps(dic_act, indent =1, sort_keys=True) print >> f, json.dumps(dic_intent_act, indent =1, sort_keys=True) 58 print >> f, json.dumps(dic_intent_cat, indent =1, sort_keys=True) # Service dictionary with open("count_service",'w') as f: print >> f, json.dumps(dic_ser, indent =1, sort_keys=True) # Receiver dictionary with open("count_receiver",'w') as f: print >> f, json.dumps(dic_rec, indent =1, sort_keys=True) print >> f, json.dumps(dic_rintent_act, indent =1, sort_keys=True) print >> f, json.dumps(dic_rintent_cat, indent =1, sort_keys=True) # Function dictionary with open("count_functions",'w') as f: print >> f, json.dumps(dic_func, indent =1, sort_keys=True) # Others with open("count_others", "w") as f: print >> f, "Native code = ", dic_nativecode print >> f, "Dynamic code = ", dic_dynamiccode print >> f, "Reflection code = ", dic_reflectcode print >> f, "Ascii Obfuscate code = ", dic_obfuscatcode filenames = ['count_permission', 'count_activity','count_service','count_receiver', 'count_functions', 'count_others'] with open("count_all", 'w') as outfile: for fname in filenames: with open(fname) as infile: print >> outfile, fname print >> outfile, "-------------------------" outfile.write(infile.read()) def Main(): try: # Get input/output files information from batch file Directory = sys.argv[1] Begins(Directory) except getopt.GetoptError as err: print str(err) usage() sys.exit(2) if __name__ == '__main__': Main() 59 APPENDIX F COUNTPARAMETER.PY #!/usr/bin/python #python COUNTPARAMETER.py DroidDreamOut import sys, getopt import GLOBAL import os def CheckString(Directory, filename, i): inFile = open(Directory+"/"+filename, 'r') results={} Parameternames = { 'ACCESS_WIFI_STATE', 'CHANGE_WIFI_STATE','INTERNET', 'READ_PHONE_STATE','AlarmReceiver', 'root.Setting','changeWifiState', 'dopermroot', 'getIMEI', 'getIMSI', 'getRawResource', 'installSu','isPackageInstalled','onReceive', 'postUrl','removeExploit','restoreWifiState' } for p in Parameternames: results[p] = 'FALSE' readfile = inFile.read() for p in Parameternames: if p in readfile: results[p] = 'TRUE' return results #print filename, results def Begins(Directory): i=0 open("outputParameter.txt", 'w').close() for filename in os.listdir(Directory): if filename.endswith(".out"): results = CheckString(Directory, filename,i) i = i+1 with open("outputParameter.txt", 'a') as f: 60 print >> f, results def Main(): try: # Get input/output files information from batch file Directory = sys.argv[1] Begins(Directory) except getopt.GetoptError as err: print str(err) usage() sys.exit(2) if __name__ == '__main__': Main() 61