[ENG] CVE-2021-39863 Analysis

2024년 9월 1일


An Out Of Bounds Read / Write and Heap Buffer OverFlow vulnerability occurred during the process of connecting two URLs with different encodings in Adobe

  • Affected Products
    • We used 32-bit Adobe Acrobat Reader DC 21.005.20048.43252 on Exploit.
    • You can download the Adobe with this version at this link


1. Find the Vulnerable Function

  • This vulnerability occurs during URL concatenating, so we debugged IA32.api in Adobe Acrobat Reader DC.

  • The referenced Exploit blogs don't have the method of finding a weak function, so we found a weak function in three ways.

  • 3 ways to find vulnerable function

    1. Cross-Reference Check

      • Checked by Exodus Blog, the function called from the vulnerable function calls the strncat function to concatenate the URLs.

      • IA32.api uses the strncat function by importing from other modules.

        • The names of the functions imported from other modules are stored in the .rdata segment and used when this function calls.
      • Like [Fig 1] and [Fig 2], we found the vulnerable function by following the cross-reference from the string "strncat" stored in the .rdata segment.
        [Fig 1] Find the string "strncat" and find the cross-reference function of it.

      [Fig 2] Check the cross-reference function of the function which was found in [Fig 1]

    2. Search as OpCode

      • The vulnerable function checks UTF-16BOM to find UTF-16BE encoding.
        : For example, it performs a comparative operation 1-byte value and \xFF
      • We found OpCode which compares 1-byte register and \xFF and found the vulnerable function using found OpCode.

      [Fig 3] Find the vulnerable function by Searching OpCode

    3. Use IDC IDA Script

      • We found out from the Exodus blog that the vulnerable function has many instructions.

      • Based on this, we thought we could find the vulnerable function easily by searching the number of instructions.

      • We found the vulnerable functions by programming scripts to find functions with more than a specific number of instructions with IDC, the built-in programming language of IDA.

        // IDC script to find functions more than a specific number of instructions
        #include <idc.idc>
        static main() {
        	auto func, end, count, inst;
        	func = 0;
        	Message("================ START =================\n");
        	for(func = NextFunction(func); func != BADADDR; func = NextFunction(func)) {
        		if(func != -1) {
        			end = GetFunctionAttr(func, FUNCATTR_END);
        			count = 0;
        			inst = func;
        			while(inst < end) {
        				inst = FindCode(inst, SEARCH_DOWN | SEARCH_NEXT);
        			if(count > 400) {
        				Message("%s contains %d instructions\n" , Name(func), count);
        		} else {
        			//Message("No function found at location %x", func);

      [Fig 4] Find the vulnerable function with IDC IDA Script

2. Analysis of the Vulnerable Function

Decompiled Code for the Vulnerable Function in IA32.api
__int16 __cdecl ExploitPoint(wchar_t *Source, CHAR *lpString, char *String, _DWORD *a4, int *a5)
	__int16 v5; // di
	wchar_t *concatURL_addr; // ebx
	CHAR *lpString_copy; // eax
	CHAR v8; // dl
	__int64 len_lpString; // rax
	wchar_t *Source_copy; // ecx
	__int64 len_Source; // rax
	int v12; // eax
	int totallen_Source; // eax
	int len_Source_notUTF; // eax
	CHAR *allocadr_Source; // eax
	wchar_t *v16; // ecx
	int totallen_lpString; // eax
	int len_lpString_notUTF; // eax
	CHAR *allocadr_lpString; // eax
	int v20; // eax
	int v21; // edx
	int v22; // edx
	_DWORD *v23; // eax
	int v24; // ecx
	int *v25; // eax
	int v26; // ecx
	int v27; // eax
	int v28; // ecx
	int v29; // eax
	wchar_t *v30; // ecx
	int v31; // eax
	int len_allocaddr; // eax
	int v33; // eax
	int v34; // ecx
	int v35; // edx
	wchar_t *v37; // [esp-4h] [ebp-F4h]
    unsigned int v38; // [esp-4h] [ebp-F4h]
    wchar_t *v39; // [esp-4h] [ebp-F4h]
    unsigned int v40; // [esp-4h] [ebp-F4h]
    unsigned int v41; // [esp-4h] [ebp-F4h]
    int v42[7]; // [esp+Ch] [ebp-E4h] BYREF
    int v43; // [esp+28h] [ebp-C8h]
    int v44; // [esp+2Ch] [ebp-C4h]
    int v45; // [esp+30h] [ebp-C0h]
    int v46; // [esp+34h] [ebp-BCh]
    wchar_t *v47; // [esp+38h] [ebp-B8h]
    __int64 v48; // [esp+3Ch] [ebp-B4h]
    int v49; // [esp+4Ch] [ebp-A4h]
    int v50[3]; // [esp+50h] [ebp-A0h] BYREF
    int v51; // [esp+5Ch] [ebp-94h]
    int v52; // [esp+60h] [ebp-90h]
    int v53; // [esp+64h] [ebp-8Ch]
    int v54; // [esp+68h] [ebp-88h]
    int v55; // [esp+6Ch] [ebp-84h]
    int v56; // [esp+70h] [ebp-80h]
    int v57; // [esp+74h] [ebp-7Ch]
    int v58; // [esp+78h] [ebp-78h]
    char *v59; // [esp+7Ch] [ebp-74h]
    __int64 v60; // [esp+80h] [ebp-70h]
    __int64 v61; // [esp+88h] [ebp-68h]
    int v62; // [esp+90h] [ebp-60h]
    int v63[3]; // [esp+94h] [ebp-5Ch] BYREF
    int v64; // [esp+A0h] [ebp-50h]
    int v65; // [esp+A4h] [ebp-4Ch]
    int v66; // [esp+A8h] [ebp-48h]
    int v67; // [esp+ACh] [ebp-44h]
    int v68; // [esp+B0h] [ebp-40h]
    int v69; // [esp+B4h] [ebp-3Ch]
    int v70; // [esp+B8h] [ebp-38h]
    int v71; // [esp+BCh] [ebp-34h]
    void *v72; // [esp+C0h] [ebp-30h]
    __int128 v73; // [esp+C4h] [ebp-2Ch]
    int v74; // [esp+D4h] [ebp-1Ch]
    int iMaxLength[2]; // [esp+D8h] [ebp-18h]
    LPCSTR allocadr_lpString_copy; // [esp+E0h] [ebp-10h]
    LPCSTR allocadr_Source_copy; // [esp+E4h] [ebp-Ch]
    int v78[2]; // [esp+E8h] [ebp-8h] BYREF
    allocadr_Source_copy = 0;
    allocadr_lpString_copy = 0;
    v5 = 1;
    *(_QWORD *)v78 = 0i64;
    *(_QWORD *)iMaxLength = 0i64;
    concatURL_addr = 0;
    v49 = 0;
    v62 = 0;
    v74 = 0;
    if(!a5) return 0;
    *a5 = 0;
    // [1-1] get the length of relative URL
    lpString_copy = lpString;
    if(lpString && *lpString && (v8 = lpString[1]) != 0 && *lpString == (CHAR)0xFE && v8 == (CHAR)0xFF) {
		len_lpString = ((__int64 (__cdecl *)(CHAR *))strlen_UTF16BE)(lpString);
		v78[1] = len_lpString;
		if ((HIDWORD(len_lpString)&(unsigned int)len_lpString) == -1) {
			*a5 = -2;
            return 0;
        lpString_copy = lpString;
	} else {
		v78[1] = v78[0];
    // [1-2] get the length of base URL
    Source_copy = Source;
    if(!Source || !lpString_copy || !String || !a4) {
        *a5 = -2;
        goto LABEL_86;
    if(*(_BYTE *)Source != 0xFE) goto LABEL_25;
    if(*((_BYTE *)Source+1) == 0xFF) {
        len_Source = ((__int64 (__cdecl *)(wchar_t *))strlen_UTF16BE)(Source);
        iMaxLength[1] = len_Source;
        if((HIDWORD(len_Source)&(unsigned int)len_Source) == -1) goto LABEL_9;
        Source_copy = Source;
        v12 = iMaxLength[1];
	} else {
        v12 = iMaxLength[0];
	if(*(_BYTE *)Source_copy == 0xFE && *((_BYTE *)Source_copy+1) == 0xFF) {
        totallen_Source = v12 + 2;
	} else {
        len_Source_notUTF = (int)custom_strlen((LPCSTR)Source_copy);
        Source_copy = v37;
        totallen_Source = len_Source_notUTF + 1;
    iMaxLength[1] = totallen_Source;
    // [2-1] store base URL to new heap
    allocadr_Source = (CHAR *)((int (__usercall *)@<eax>(wchar_t *@<e>, int, int))calloc_guess)(Source_copy, 1, totallen_Source);
    allocadr_Source_copy = allocadr_Source;
    if(!allocadr_Source) {
        *a5 = -7;
        return 0;
    ((void (__usercall *)(unsigned int@<ecx>, wchar_t *, wchar_t *, int))custom_strncpy)(v38, (wchar_t *)allocadr_Source, Source, iMaxLength[1]);
    if(*lpString==(CHAR)0xFE && lpString[1]==(CHAR)0xFF) {
        totallen_lpString = v78[1] + 2;
    } else {
        len_lpString_notUTF = (int)custom_strlen(lpString);
        v16 = v39;
        totallen_lpString = len_lpString_notUTF + 1;
    v78[1] = totallen_lpString;
    // [2-2] store relative URL to new heap
    allocadr_lpString = (CHAR *)((int (__usercall *)@<eax>(wchar_t *@<ecx>, int, int))calloc_guess)(v16, 1, totallen_lpString);
    allocadr_lpString_copy = allocadr_lpString;
    if(!allocadr_lpString) {
        *a5 = -7;
		v5 = 0;
        goto LABEL_87;
    ((void (__usercall *)(unsigned int@<ecx>, wchar_t *, wchar_t *, int))custom_strncpy)(v40, (wchar_t *)allocadr_lpString, (wchar_t *)lpString, v78[1]);
    if(!(unsigned __int16)check_modify_URL((int)allocadr_Source_copy, iMaxLength[1], a5) || !(unsigned __int16)check_modify_URL((int)allocadr_lpString_copy, v78[1], a5)) {
        goto LABEL_86;
    // [3] Perform URL-related operations
    v20 = URLparse_process((CHAR *)allocadr_Source_copy, v42);
    if(v20 || (v20 = URLparse_process((CHAR *)allocadr_lpString_copy, v50)) != 0) {
        *a5 = v20;
        goto LABEL_86;
    if(!*(_BYTE *)Source || (v21 = v42[0], v50[0] != 5) && v50[0] != v42[0]) {
        v35 = sub_25802FAC((int)v50);
        v23 = a4;
        v24 = v35 + 1;
        if(v35 + 1 > *a4) goto LABEL_44;
        *a4 = v35;
        v25 = v50;
        goto LABEL_82;
    if(*lpString) {
        v26 = v55;
        v63[1] = v42[1];
        v63[2] = v42[2];
        v27 = v51;
        v63[0] = v42[0];
        v73 = 0i64;
        if(!v51 && !v53 && !v55) {
			if(sub_25803155(v50)) {
            	v28 = v44;
            	v64 = v42[3];
            	v65 = v42[4];
            	v66 = v42[5];
            	v67 = v42[6];
            	v29 = v43;
            	if(v49 == 1) {
                	v29 = v43 + 2;
                	v28 = v44 - 1;
                	v43 += 2;
            	v69 = v28;
            	v68 = v29;
            	v70 = v45;
            	if(v58) {
                	if (*v59 != '/') {
    	            	// [4] Allocate new heap to store connected URL
                		concatURL_addr = (wchar_t *)((int (__usercall *)@<eax>(wchar_t *@<ecx>, int, int))calloc_guess)((wchar_t *)(v58 + 1), 1, v58 + 1 + v46);
                		if(!concatURL_addr) {
                  			v23 = a4;
                  			v24 = v58 + v46 + 1;
                  			goto LABEL_44;
                		if(v46) {
    	            		// [5] Store base URL to heap memory which is allocated at [4]
                  			((void (__usercall *)(unsigned int@<ecx>, wchar_t *, wchar_t *, int))custom_strncpy)(v41, concatURL_addr, v47, v46 + 1);
                  			if (*((_BYTE *)concatURL_addr + v46 - 1) != '/') {
                    			v31 = ((int (__usercall *)@<eax>(wchar_t *@<ecx>, char *, int))sub_25818D6E)(v30, (char *)concatURL_addr, '/');
                    			if(v31) *(_BYTE *)(v31 + 1) = 0;
                    			else *(_BYTE *)concatURL_addr = 0;
                		// [6] Concatenate relative URL after the base URL stored at [5] : OOB R/W and Heap BOF occurs
		                if(v58) {
				        	len_allocaddr = (int)custom_strlen((LPCSTR)concatURL_addr);
							((void (__usercall *)(uintptr_t@<ecx>, char *, char *, int))custom_strncat)(v58 + 1, (char *)concatURL_addr, v59, v58 + 1 + len_allocaddr);
						sub_25802E0C((int)concatURL_addr, 0);
                		v71 = (int)custom_strlen((LPCSTR)concatURL_addr);
                		v72 = concatURL_addr;
                		goto LABEL_75;
					v71 = v58;
					v72 = v59;
				} else {
					v71 = v46;
            		v72 = v47;
            		if((_DWORD)v60) goto LABEL_75;
            		*(_QWORD *)&v73 = v48;
				if((_DWORD)v73) {
					if ( (int)v61 > 0 ) *((_QWORD *)&v73 + 1) = v61;
        	    	v34 = sub_25802FAC((int)v63);
            		if(v34+1 > *a4) {
                		*a4 = v34 + 1;
                		goto LABEL_45;
    	        	*a4 = v34;
        	    	v25 = v63;
            		goto LABEL_82;
				if((int)v60 > 0) *(_QWORD *)&v73 = v60;
				goto LABEL_77;
			v26 = v55;
			v21 = v42[0];
			v27 = v51;
		v64 = v27;
		v65 = v52;
		v66 = v53;
		v67 = v54;
		v33 = v56;
		if(v62 == 1) {
			v26 += 2;
    	    v33 = v56 - 1;
    	    v55 = v26;
		v69 = v33;
		v68 = v26;
		v71 = v58;
		v70 = v57;
		v72 = v59;
		if(v57) goto LABEL_75;
		v78[1] = 0;
		if(!sub_25802C93(v21, &v78[1])) goto LABEL_75;
		v70 = v78[1];
		goto LABEL_74;
    v22 = sub_25802FAC((int)v42);
    v23 = a4;
    v24 = v22 + 1;
    if(v22+1 > *a4) {
		*v23 = v24;
        *a5 = -3;
        goto LABEL_86;
    *a4 = v22;
    v25 = v42;
    sub_25803194((int)v25, String);
		(*(void (__cdecl **)(LPCSTR))(dword_25824098 + 12))(allocadr_Source_copy);
			(*(void (__cdecl **)(LPCSTR))(dword_25824098 + 12))(allocadr_lpString_copy);
        	(*(void (__cdecl **)(wchar_t *))(dword_25824098 + 12))(concatURL_addr);
	return v5;

You should understand follow functions to understand the Root Cause.

  1. ExploitPoint() : Start function to analysis

    • Parameter

      • _BYTE *Source : baseURL
      • _BYTE *lpString : relative URL
    • The necessary steps for the exploit can be summarized as follows (You can find these steps in the previous code in toggle, Decompiled Code for the Vulnerable Function in IA32.api):

      1. Get the length of the URLs
        [1-1] Get the length of the relative URL
        [1-2] Get the length of the base URL

      2. Store the URLs in the Heap memory
        [2-1] Store the base URL in the new Heap memory
        [2-2] Store the relative URL in the new Heap memory

      3. Perform URL-related operations (This is no direct association with Exploit.)

      4. Allocate new Heap memory to store the connected URL

      5. Store the base URL to Heap memory which is allocated at [4]

      6. Concatenate the relative URL after the base URL which is stored at [5] : OOB R/W and Heap BOF occurs

  2. strlen_UTF16BE : Calculate the length of the string encoded as UTF-16BE

    • Parameter
      • char *string : UTF-16BE encoded string to calculate the length
    • return value : The number of bytes of string except the Null Terminator
    int __cdecl strlen_UTF16BE(char *string)
      char *p_string_i0; // eax
      char string_i1; // cl
      int length; // esi
      char string_i0; // bl
      char *p_string_i1; // eax
      p_string_i0 = string;
      if(!string || *string != -2 || string[1] != -1) return -1;
      string_i1 = 0;
      length = 0;
      do {
        string_i0 = *p_string_i0;
        p_string_i1 = p_string_i0 + 1;
        if(!p_string_i1) break;
        string_i1 = *p_string_i1;
        p_string_i0 = p_string_i1 + 1;
        if(!string_i0) goto LABEL_10;
        if(!string_i1) break;
        length += 2;
      } while ( p_string_i0 );
      if(string_i0) return -1;
      if(!string_i1) return length;
      else return -1;
  3. custom_strlen() : strlen function also works for UTF-16BE encoded string

    • Parameter
      • LPCSTR lpString : UTF-16BE encoded lpString to calculate the length
    • return value : The number of bytes except the Null Terminator
    int __cdecl strlen_UTF16BE_(char *string)
      char *v1; // ecx
      int i; // edx
      char v4; // al
      v1 = string;
      if(!string) return 0;
      for(i = 0; ; i += 2) {
        v4 = *v1;
        v1 += 2;
        if (!v4 && !*(v1-1)) break;
      return i;
  4. calloc_guess() : alloc function in C

  5. custom_strncpy() : strncpy function also works for UTF-16BE encoded string

    • Parameter
      • wchar_t *Destination : The address to store the copied string
      • wchar_t *Source : The string to copy
      • unsigned int iMaxLength : The length to copy a string
    • return value : Destination
    wchar_t *__cdecl custom_strncpy(wchar_t *Destination, wchar_t *Source, unsigned int iMaxLength)
      wchar_t *result; // eax
      int pExceptionObject; // [esp+Ch] [ebp-4h] BYREF
      if(!Destination || !Source || !iMaxLength) {
        (*(void (__thiscall **)(_DWORD, int))(dword_258240A4 + 4))(*(_DWORD *)(dword_258240A4 + 4), 1073741827);
        pExceptionObject = 0;
        CxxThrowException(&pExceptionObject, (_ThrowInfo *)&_TI1H);
      if(*(_BYTE *)Source == 0xFE && *((_BYTE *)Source+1) == 0xFF)
        return wcsncpy(Destination, Source, iMaxLength >> 1);
      result = (wchar_t *)lstrcpynA((LPSTR)Destination, (LPCSTR)Source, iMaxLength);
      *((_BYTE *)Destination + iMaxLength - 1) = 0;
      return result;
  6. custom_strncat() : strncat function also works for UTF-16BE encoded string

    • Parameter
      • char *Destination, char *Source, int maxlength
    • Concatenate the Source string after the Destination string
    • Call custom_strcat(Destination, Source); via second if statement in normal case
    int __cdecl custom_strncat(char *Destination, char *Source, int maxlength)
      int result; // eax
      LPCSTR pExceptionObject; // [esp+10h] [ebp-4h] BYREF
      if(!Destination || !Source || !maxlength) {
        (*(void (__thiscall **)(_DWORD, int))(dword_258240A4 + 4))(*(_DWORD *)(dword_258240A4 + 4), 1073741827);
        pExceptionObject = 0;
        CxxThrowException(&pExceptionObject, (_ThrowInfo *)&_TI1H);
      pExceptionObject = custom_strlen(Destination);
      if(&custom_strlen(Source)[(int)pExceptionObject] <= (const CHAR *)(maxlength-1)) {
        custom_strcat(Destination, Source);
        return 1;
      } else {
        strncat(Destination, Source, maxlength - (_DWORD)pExceptionObject - 1);
        result = 0;
        Destination[maxlength - 1] = 0;
      return result;

  7. custom_strcat() : strcat function also works for UTF-16BE encoded string. The vulnerability occurs directly.

    • Parameter

      • LPSTR lpString1 : base URL
      • LPCSTR lpString2 : relative URL
    • If the base URL is encoded as UTF-16BE, then this function copies 2 bytes at a time from the relative URL to after the base URL until "\x00\00" comes.

      • This means the program recognizes that lpString1 and lpString2 were encoded as UTF-16BE, and proceeds with URL concatenating.
    • If the base URL is not encoded as UTF-16BE, then this function copies 1 byte at a time from the relative URL to after the base URL until "\00" comes.

      • This means the program recognizes that lpString1 and lpString2 were not encoded as UTF-16BE and proceeds with URL concatenating.
    LPSTR __cdecl custom_strcat(LPSTR lpString1, LPCSTR lpString2)
    	int len_lpString1; // eax
        LPCSTR p_lpString2_i2; // edx
        CHAR *concatpoint; // ecx
        CHAR lpString2_i2; // al
        CHAR lpString2_i3; // bl
        int pExceptionObject; // [esp+10h] [ebp-4h] BYREF
        if(!lpString1 || !lpString2) {
        	(*(void (__thiscall **)(_DWORD, int))(dword_258240A4 + 4))(*(_DWORD *)(dword_258240A4 + 4), 1073741827);
            pExceptionObject = 0;
            CxxThrowException(&pExceptionObject, (_ThrowInfo *)&_TI1H);
        if(*lpString1 == (CHAR)0xFE && lpString1[1] == (CHAR)0xFF) {
    		len_lpString1 = (int)custom_strlen(lpString1);
            p_lpString2_i2 = lpString2 + 2;
            concatpoint = &lpString1[len_lpString1];
            do {
    			do {
    				lpString2_i2 = *p_lpString2_i2;
                	p_lpString2_i2 += 2;
                	*concatpoint = lpString2_i2;
                	concatpoint += 2;
                	lpString2_i3 = *(p_lpString2_i2 - 1);
                	*(concatpoint - 1) = lpString2_i3;
                } while(lpString2_i2);
            } while(lpString2_i3);
    	} else {
    		lstrcatA(lpString1, lpString2);
        return lpString1;

3. Root Cause

When UTF-16BE encoded base URL and ANSI encoded relative URL are concatenated,
the function in IA32.api recognize both URL are encoded UTF-16BE.

Therefore, the process of concatenating proceeds until "\x00\x00", which is not the Null Terminator of relative URL, "\x00".

This leads to the OOB R/W and Heap BOF.

[Fig 5] The schematic diagram of the Root Cause

You can see the flow of the Root cause in this Adobe root cause.pdf file.


  1. Exodus Blog

  2. More Exploits about the Adobe Acrobat Reader DC


Further Reading

  1. Encoding [UTF-16BE; ANSI]

  2. Process of JS in PDF at Adobe

  3. JS Object - SpiderMonkey

  4. Windows Heap - LFH

