cd $HOME
mkdir fuzzing_xpdf && cd fuzzing_xpdf/
sudo apt install build-essential
sudo apt install doxygen
wget https://dl.xpdfreader.com/old/xpdf-3.02.tar.gz
tar -xvzf xpdf-3.02.tar.gz
cd xpdf-3.02
sudo apt update && sudo apt install -y build-essential gcc
./configure --prefix="$HOME/fuzzing_xpdf/install/"
make
make install
cd $HOME/fuzzing_xpdf
mkdir pdf_examples && cd pdf_examples
wget https://github.com/mozilla/pdf.js-sample-files/raw/master/helloworld.pdf
wget http://www.africau.edu/images/default/sample.pdf
wget https://www.melbpc.org.au/wp-content/uploads/2017/10/small-example-pdf-file.pdf
❯ $HOME/fuzzing_xpdf/install/bin/pdfinfo -box -meta $HOME/fuzzing_xpdf/pdf_examples/helloworld.pdf
Tagged: no
Pages: 1
Encrypted: no
Page size: 200 x 200 pts
MediaBox: 0.00 0.00 200.00 200.00
CropBox: 0.00 0.00 200.00 200.00
BleedBox: 0.00 0.00 200.00 200.00
TrimBox: 0.00 0.00 200.00 200.00
ArtBox: 0.00 0.00 200.00 200.00
File size: 678 bytes
Optimized: no
PDF version: 1.7

❯ ~/fuzzing_xpdf/install/bin/pdftotext ./helloworld.pdf
❯ ls
helloworld.pdf helloworld.txt small-example-pdf-file.pdf
❯ cat helloworld.txt
Hello, world!
동작을 예상해보면
%PDF-1.7
1 0 obj % entry point
<<
/Type /Catalog
/Pages 2 0 R
>>
endobj
2 0 obj
<<
/Type /Pages
/MediaBox [ 0 0 200 200 ]
/Count 1
/Kids [ 3 0 R ]
>>
endobj
3 0 obj
<<
/Type /Page
/Parent 2 0 R
/Resources <<
/Font <<
/F1 4 0 R
>>
>>
/Contents 5 0 R
>>
endobj
4 0 obj
<<
/Type /Font
/Subtype /Type1
/BaseFont /Times-Roman
>>
endobj
5 0 obj % page content
<<
/Length 44
>>
stream
BT
70 50 TD
/F1 12 Tf
(Hello, world!) Tj
ET
endstream
endobj
xref
0 6
0000000000 65535 f
0000000010 00000 n
0000000079 00000 n
0000000173 00000 n
0000000301 00000 n
0000000380 00000 n
trailer
<<
/Size 6
/Root 1 0 R
>>
startxref
492
%%EOF
stream 으로 시작하고 endstream 으로 끝납니다.| 참고
| PDF(Portable Document Format) File Structure Analysis
rm -r $HOME/fuzzing_xpdf/install
cd $HOME/fuzzing_xpdf/xpdf-3.02/
make clean
export LLVM_CONFIG="llvm-config-11"
CC=$HOME/AFLplusplus/afl-clang-fast CXX=$HOME/AFLplusplus/afl-clang-fast++ ./configure --prefix="$HOME/fuzzing_xpdf/install/"
make
make install
afl-fuzz -i $HOME/fuzzing_xpdf/pdf_examples/ -o $HOME/fuzzing_xpdf/out/ -s 123 -- $HOME/fuzzing_xpdf/install/bin/pdftotext @@ $HOME/fuzzing_xpdf/output
❯ pwd
/home/pwn/Fuzzing101/fuzzing_xpdf/out/default/crashes
❯ ls
crash1 crash11.txt crash3.txt crash5.txt crash7.txt crash9.txt
crash10 crash1.txt crash4 crash6 crash8 pdftotext
crash10.txt crash2 crash4.txt crash6.txt crash8.txt
crash11 crash3 crash5 crash7 crash9
gdb-gef --args ~/fuzzing_xpdf/install/bin/pdftotext ./crash1
Error (449): Dictionary key must be a name object
Error (459): Dictionary key must be a name object
Error (477): Dictionary key must be a name object
Error (424): Dictionary key must be a name object
Error (426): Dictionary key must be a name object
Error (430): Dictionary key must be a name object
Error (449): Dictionary key must be a name object
Error (459): Dictionary key must be a name object
Error (477): Dictionary key must be a name object
Error (424): Dictionary key must be a name object
Error (426): Dictionary key must be a name object
Error (430): Dictionary key must be a name object
Error (449): Dictionary key must be a name object
Error (459): Dictionary key must be a name object
Error (477): Dictionary key must be a name object
Error (424): Dictionary key must be a name object
Error (426): Dictionary key must be a name object
Error (430): Dictionary key must be a name object
Error (449): Dictionary key must be a name object
Error (459): Dictionary key must be a name object
Error (477): Dictionary key must be a name object
Error (424): Dictionary key must be a name object
Error (426): Dictionary key must be a name object
Error (430): Dictionary key must be a name object
Error (449): Dictionary key must be a name object
Error (459): Dictionary key must be a name object
Error (477): Dictionary key must be a name object
Program received signal SIGSEGV, Segmentation fault.
__vfprintf_internal (s=s@entry=0x7fffff7ff590, format=0x50d780 "Error (%d): ", ap=0x7fffff801c50, mode_flags=0x0) at vfprintf-internal.c:1365
1365 vfprintf-internal.c: No such file or directory.
[ Legend: Modified register | Code | Heap | Stack | String ]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── registers ────
$rax : 0x00007fffff801c70 → 0x00007ffff7b955c0 → 0x00000000fbad2887
$rbx : 0x0
$rcx : 0x0
$rdx : 0x00007fffff801c50 → 0x0000003000000010
$rsp : 0x7fffff7feff0
$rbp : 0x00007fffff7ff560 → 0x00007fffff801c40 → 0x00000000000001a8
$rsi : 0x25
$rdi : 0x000000000050d780 → "Error (%d): "
$rip : 0x00007ffff7a1e900 → <__vfprintf_internal+00a0> call 0x7ffff79ca390 <*ABS*+0xa1c90@plt>
$r8 : 0x0
$r9 : 0x4
$r10 : 0x000000000050d789 → 0x6f72724500203a29 ("): "?)
$r11 : 0x00007ffff7b94be0 → 0x0000000002b73570 → 0x0000000000000000
$r12 : 0x00007fffff7ff590 → 0x00000000fbad8004
$r13 : 0x000000000050d780 → "Error (%d): "
$r14 : 0x00007fffff801c50 → 0x0000003000000010
$r15 : 0xfbad8004
$eflags: [ZERO carry PARITY adjust sign trap INTERRUPT direction overflow RESUME virtualx86 identification]
$cs: 0x33 $ss: 0x2b $ds: 0x00 $es: 0x00 $fs: 0x00 $gs: 0x00
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── stack ────
[!] Unmapped address: '0x7fffff7feff0'
──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── code:x86:64 ────
0x7ffff7a1e8ef <__vfprintf_internal+008f> mov rdi, r13
0x7ffff7a1e8f2 <__vfprintf_internal+0092> mov QWORD PTR [rbp-0x438], rax
0x7ffff7a1e8f9 <__vfprintf_internal+0099> movups XMMWORD PTR [rbp-0x448], xmm1
→ 0x7ffff7a1e900 <__vfprintf_internal+00a0> call 0x7ffff79ca390 <*ABS*+0xa1c90@plt>
↳ 0x7ffff79ca390 <*ABS*+0xa1c90@plt+0000> endbr64
0x7ffff79ca394 <*ABS*+0xa1c90@plt+0004> bnd jmp QWORD PTR [rip+0x1c9ca5] # 0x7ffff7b94040 <*ABS*@got.plt>
0x7ffff79ca39b <*ABS*+0xa1c90@plt+000b> nop DWORD PTR [rax+rax*1+0x0]
0x7ffff79ca3a0 <*ABS*+0xbcd20@plt+0000> endbr64
0x7ffff79ca3a4 <*ABS*+0xbcd20@plt+0004> bnd jmp QWORD PTR [rip+0x1c9c9d] # 0x7ffff7b94048 <*ABS*@got.plt>
0x7ffff79ca3ab <*ABS*+0xbcd20@plt+000b> nop DWORD PTR [rax+rax*1+0x0]
──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── arguments (guessed) ────
*ABS*+0xa1c90@plt (
$rdi = 0x000000000050d780 → "Error (%d): ",
$rsi = 0x0000000000000025,
$rdx = 0x00007fffff801c50 → 0x0000003000000010,
$rcx = 0x0000000000000000
)
──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── threads ────
[#0] Id 1, Name: "pdftotext", stopped 0x7ffff7a1e900 in __vfprintf_internal (), reason: SIGSEGV
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── trace ────
[#0] 0x7ffff7a1e900 → __vfprintf_internal(s=0x7fffff7ff590, format=0x50d780 "Error (%d): ", ap=0x7fffff801c50, mode_flags=0x0)
[#1] 0x7ffff7a21ea2 → buffered_vfprintf(s=0x7ffff7b955c0 <_IO_2_1_stderr_>, format=0x50d780 "Error (%d): ", args=0x7fffff801c50, mode_flags=0x0)
[#2] 0x7ffff7a1ed24 → __vfprintf_internal(s=0x7ffff7b955c0 <_IO_2_1_stderr_>, format=0x50d780 "Error (%d): ", ap=0x7fffff801c50, mode_flags=0x0)
[#3] 0x7ffff7a09c6a → __fprintf(stream=<optimized out>, format=<optimized out>)
[#4] 0x43a046 → error(pos=0x1a8, msg=0x514f8f "Dictionary key must be a name object")
[#5] 0x4ab4b2 → Parser::getObj(this=<optimized out>, obj=<optimized out>, fileKey=0x0, encAlgorithm=cryptRC4, keyLength=0x0, objNum=0x4, objGen=0x0)
[#6] 0x4d5deb → XRef::fetch(this=0x10b8a40, num=0x4, gen=0x0, obj=0x7fffff801f90)
[#7] 0x4abc0d → Object::dictLookup(this=0x7fffff802140, key=0x25 <error: Cannot access memory at address 0x25>, obj=0x7fffff801f90)
[#8] 0x4abc0d → Parser::makeStream(this=0x2b72a60, dict=0x7fffff802140, fileKey=0x0, encAlgorithm=cryptRC4, keyLength=0x0, objNum=0x4, objGen=0x0)
[#9] 0x4ab83a → Parser::getObj(this=<optimized out>, obj=<optimized out>, fileKey=0x0, encAlgorithm=cryptRC4, keyLength=0x0, objNum=0x4, objGen=0x0)
─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Parser::getObj() 함수가 보입니다.gef➤ bt
.
.
.
#4325 0x00000000004ab83a in Parser::getObj (this=<optimized out>, obj=<optimized out>, fileKey=0x0, encAlgorithm=cryptRC4, keyLength=0x0, objNum=0x4, objGen=0x0) at Parser.cc:94
#4326 0x00000000004d5deb in XRef::fetch (this=0x10b8a40, num=0x4, gen=0x0, obj=0x7fffff873e10) at XRef.cc:823
#4327 0x00000000004abc0d in Object::dictLookup (this=0x7fffff873fc0, key=0x25 <error: Cannot access memory at address 0x25>, obj=0x7fffff873e10) at ./Object.h:253
#4328 Parser::makeStream (this=this@entry=0x29f6f60, dict=dict@entry=0x7fffff873fc0, fileKey=fileKey@entry=0x0, encAlgorithm=encAlgorithm@entry=cryptRC4, keyLength=keyLength@entry=0x0, objNum=objNum@entry=0x4, objGen=0x0) at Parser.cc:156
#4329 0x00000000004ab83a in Parser::getObj (this=<optimized out>, obj=<optimized out>, fileKey=0x0, encAlgorithm=cryptRC4, keyLength=0x0, objNum=0x4, objGen=0x0) at Parser.cc:94
#4330 0x00000000004d5deb in XRef::fetch (this=0x10b8a40, num=0x4, gen=0x0, obj=0x7fffff873fc0) at XRef.cc:823
#4331 0x00000000004abc0d in Object::dictLookup (this=0x7fffff874170, key=0x25 <error: Cannot access memory at address 0x25>, obj=0x7fffff873fc0) at ./Object.h:253
#4332 Parser::makeStream (this=this@entry=0x29f69c0, dict=dict@entry=0x7fffff874170, fileKey=fileKey@entry=0x0, encAlgorithm=encAlgorithm@entry=cryptRC4, keyLength=keyLength@entry=0x0, objNum=objNum@entry=0x4, objGen=0x0) at Parser.cc:156
#4333 0x00000000004ab83a in Parser::getObj (this=<optimized out>, obj=<optimized out>, fileKey=0x0, encAlgorithm=cryptRC4, keyLength=0x0, objNum=0x4, objGen=0x0) at Parser.cc:94
#4334 0x00000000004d5deb in XRef::fetch (this=0x10b8a40, num=0x4, gen=0x0, obj=0x7fffff874170) at XRef.cc:823
#4335 0x00000000004abc0d in Object::dictLookup (this=0x7fffff874320, key=0x25 <error: Cannot access memory at address 0x25>, obj=0x7fffff874170) at ./Object.h:253
#4336 Parser::makeStream (this=this@entry=0x29f6420, dict=dict@entry=0x7fffff874320, fileKey=fileKey@entry=0x0, encAlgorithm=encAlgorithm@entry=cryptRC4, keyLength=keyLength@entry=0x0, objNum=objNum@entry=0x4, objGen=0x0) at Parser.cc:156
#4337 0x00000000004ab83a in Parser::getObj (this=<optimized out>, obj=<optimized out>, fileKey=0x0, encAlgorithm=cryptRC4, keyLength=0x0, objNum=0x4, objGen=0x0) at Parser.cc:94
#4338 0x00000000004d5deb in XRef::fetch (this=0x10b8a40, num=0x4, gen=0x0, obj=0x7fffff874320) at XRef.cc:823
#4339 0x00000000004abc0d in Object::dictLookup (this=0x7fffff8744d0, key=0x25 <error: Cannot access memory at address 0x25>, obj=0x7fffff874320) at ./Object.h:253
#4340 Parser::makeStream (this=this@entry=0x29f5e80, dict=dict@entry=0x7fffff8744d0, fileKey=fileKey@entry=0x0, encAlgorithm=encAlgorithm@entry=cryptRC4, keyLength=keyLength@entry=0x0, objNum=objNum@entry=0x4, objGen=0x0) at Parser.cc:156
#4325 0x00000000004ab83a in Parser::getObj (this=<optimized out>, obj=<optimized out>, fileKey=0x0, encAlgorithm=cryptRC4, keyLength=0x0, objNum=0x4, objGen=0x0) at Parser.cc:94
#4326 0x00000000004d5deb in XRef::fetch (this=0x10b8a40, num=0x4, gen=0x0, obj=0x7fffff873e10) at XRef.cc:823
#4327 0x00000000004abc0d in Object::dictLookup (this=0x7fffff873fc0, key=0x25 <error: Cannot access memory at address 0x25>, obj=0x7fffff873e10) at ./Object.h:253
#4328 Parser::makeStream (this=this@entry=0x29f6f60, dict=dict@entry=0x7fffff873fc0, fileKey=fileKey@entry=0x0, encAlgorithm=encAlgorithm@entry=cryptRC4, keyLength=keyLength@entry=0x0, objNum=objNum@entry=0x4, objGen=0x0) at Parser.cc:156
#4329 0x00000000004ab83a in Parser::getObj (this=<optimized out>, obj=<optimized out>, fileKey=0x0, encAlgorithm=cryptRC4, keyLength=0x0, objNum=0x4, objGen=0x0) at Parser.cc:94
#4330 0x00000000004d5deb in XRef::fetch (this=0x10b8a40, num=0x4, gen=0x0, obj=0x7fffff873fc0) at XRef.cc:823
#4331 0x00000000004abc0d in Object::dictLookup (this=0x7fffff874170, key=0x25 <error: Cannot access memory at address 0x25>, obj=0x7fffff873fc0) at ./Object.h:253
#4332 Parser::makeStream (this=this@entry=0x29f69c0, dict=dict@entry=0x7fffff874170, fileKey=fileKey@entry=0x0, encAlgorithm=encAlgorithm@entry=cryptRC4, keyLength=keyLength@entry=0x0, objNum=objNum@entry=0x4, objGen=0x0) at Parser.cc:156
#4333 0x00000000004ab83a in Parser::getObj (this=<optimized out>, obj=<optimized out>, fileKey=0x0, encAlgorithm=cryptRC4, keyLength=0x0, objNum=0x4, objGen=0x0) at Parser.cc:94
#4334 0x00000000004d5deb in XRef::fetch (this=0x10b8a40, num=0x4, gen=0x0, obj=0x7fffff874170) at XRef.cc:823
#4335 0x00000000004abc0d in Object::dictLookup (this=0x7fffff874320, key=0x25 <error: Cannot access memory at address 0x25>, obj=0x7fffff874170) at ./Object.h:253
#4336 Parser::makeStream (this=this@entry=0x29f6420, dict=dict@entry=0x7fffff874320, fileKey=fileKey@entry=0x0, encAlgorithm=encAlgorithm@entry=cryptRC4, keyLength=keyLength@entry=0x0, objNum=objNum@entry=0x4, objGen=0x0) at Parser.cc:156
#4337 0x00000000004ab83a in Parser::getObj (this=<optimized out>, obj=<optimized out>, fileKey=0x0, encAlgorithm=cryptRC4, keyLength=0x0, objNum=0x4, objGen=0x0) at Parser.cc:94
#4338 0x00000000004d5deb in XRef::fetch (this=0x10b8a40, num=0x4, gen=0x0, obj=0x7fffff874320) at XRef.cc:823
#4339 0x00000000004abc0d in Object::dictLookup (this=0x7fffff8744d0, key=0x25 <error: Cannot access memory at address 0x25>, obj=0x7fffff874320) at ./Object.h:253
#4340 Parser::makeStream (this=this@entry=0x29f5e80, dict=dict@entry=0x7fffff8744d0, fileKey=fileKey@entry=0x0, encAlgorithm=encAlgorithm@entry=cryptRC4, keyLength=keyLength@entry=0x0, objNum=objNum@entry=0x4, objGen=0x0) at Parser.cc:156
backtrace 를 찍고 함수 호출 흐름을 확인해보면Parser::getObj → Parser::makeStream → Object::dictLookup → XRef::fetch → Parser::getObjgef➤ bt -20
#77503 0x00000000004abc0d in Object::dictLookup (this=0x7fffffffd6e0, key=0x25 <error: Cannot access memory at address 0x25>, obj=0x7fffffffd530) at ./Object.h:253
#77504 Parser::makeStream (this=this@entry=0x10c7830, dict=dict@entry=0x7fffffffd6e0, fileKey=fileKey@entry=0x0, encAlgorithm=encAlgorithm@entry=cryptRC4, keyLength=keyLength@entry=0x0, objNum=objNum@entry=0x4, objGen=0x0) at Parser.cc:156
#77505 0x00000000004ab83a in Parser::getObj (this=<optimized out>, obj=<optimized out>, fileKey=0x0, encAlgorithm=cryptRC4, keyLength=0x0, objNum=0x4, objGen=0x0) at Parser.cc:94
#77506 0x00000000004d5deb in XRef::fetch (this=0x10b8a40, num=0x4, gen=0x0, obj=0x7fffffffd6e0) at XRef.cc:823
#77507 0x00000000004abc0d in Object::dictLookup (this=0x7fffffffd890, key=0x25 <error: Cannot access memory at address 0x25>, obj=0x7fffffffd6e0) at ./Object.h:253
#77508 Parser::makeStream (this=this@entry=0x10c73f0, dict=dict@entry=0x7fffffffd890, fileKey=fileKey@entry=0x0, encAlgorithm=encAlgorithm@entry=cryptRC4, keyLength=keyLength@entry=0x0, objNum=objNum@entry=0x4, objGen=0x0) at Parser.cc:156
#77509 0x00000000004ab83a in Parser::getObj (this=<optimized out>, obj=<optimized out>, fileKey=0x0, encAlgorithm=cryptRC4, keyLength=0x0, objNum=0x4, objGen=0x0) at Parser.cc:94
#77510 0x00000000004d5deb in XRef::fetch (this=0x10b8a40, num=0x4, gen=0x0, obj=0x7fffffffd890) at XRef.cc:823
#77511 0x00000000004abc0d in Object::dictLookup (this=0x7fffffffda08, key=0x25 <error: Cannot access memory at address 0x25>, obj=0x7fffffffd890) at ./Object.h:253
#77512 Parser::makeStream (this=this@entry=0x10c7320, dict=dict@entry=0x7fffffffda08, fileKey=fileKey@entry=0x0, encAlgorithm=encAlgorithm@entry=cryptRC4, keyLength=keyLength@entry=0x0, objNum=objNum@entry=0x4, objGen=0x0) at Parser.cc:156
#77513 0x00000000004ab83a in Parser::getObj (this=<optimized out>, obj=<optimized out>, fileKey=0x0, encAlgorithm=cryptRC4, keyLength=0x0, objNum=0x4, objGen=0x0) at Parser.cc:94
#77514 0x00000000004d5deb in XRef::fetch (this=0x10b8a40, num=0x4, gen=0x0, obj=0x7fffffffda08) at XRef.cc:823
#77515 0x000000000045decd in GfxFontDict::GfxFontDict (this=0x10c6dc0, xref=<optimized out>, fontDictRef=<optimized out>, fontDict=0x10c6bd0) at GfxFont.cc:1518
#77516 0x000000000044a2c6 in GfxResources::GfxResources (this=this@entry=0x1099330, xref=0x10b8a40, resDict=resDict@entry=0x10c6b80, nextA=nextA@entry=0x0) at Gfx.cc:282
#77517 0x000000000044abd8 in Gfx::Gfx (this=0x10c7280, xrefA=<optimized out>, outA=0x10c6f10, pageNum=0x1, resDict=0x7fffff801c50, hDPI=72, vDPI=72, box=0x7fffffffdbd0, cropBox=0x0, rotate=0x0, abortCheckCbkA=0x0, abortCheckCbkDataA=0x0) at Gfx.cc:449
#77518 0x00000000004a9fba in Page::displaySlice (this=this@entry=0x10c5200, out=0x7fffff801c50, out@entry=0x10c6f10, hDPI=<optimized out>, hDPI@entry=72, vDPI=<optimized out>, vDPI@entry=72, rotate=<optimized out>, rotate@entry=0x0, useMediaBox=<optimized out>, useMediaBox@entry=0x0, crop=<optimized out>, crop@entry=0x1, sliceX=<optimized out>, sliceX@entry=0xffffffff, sliceY=<optimized out>, sliceW=<optimized out>, sliceH=<optimized out>, printing=<optimized out>, catalog=<optimized out>, abortCheckCbk=<optimized out>, abortCheckCbkData=<optimized out>) at Page.cc:311
#77519 0x00000000004a9d24 in Page::display (this=0x50d780, out=0x25, hDPI=72, vDPI=72, rotate=0xff801c50, useMediaBox=0x0, crop=0x0, printing=0x0, catalog=0x10c6240, abortCheckCbk=0x0, abortCheckCbkData=0x0) at Page.cc:264
#77520 0x00000000004acb8a in PDFDoc::displayPage (this=0x10c4040, out=0x10c6f10, page=<optimized out>, hDPI=<optimized out>, vDPI=<optimized out>, rotate=0x0, useMediaBox=<optimized out>, crop=<optimized out>, printing=<optimized out>, abortCheckCbk=<optimized out>, abortCheckCbkData=<optimized out>) at PDFDoc.cc:317
#77521 PDFDoc::displayPages (this=0x10c4040, out=0x10c6f10, firstPage=<optimized out>, lastPage=<optimized out>, hDPI=<optimized out>, vDPI=<optimized out>, rotate=0x0, useMediaBox=<optimized out>, crop=<optimized out>, printing=<optimized out>, abortCheckCbk=<optimized out>, abortCheckCbkData=<optimized out>) at PDFDoc.cc:330
#77522 0x00000000004d8fce in main (argc=<optimized out>, argv=<optimized out>) at pdftotext.cc:237
main → PDFDoc::displayPage → Page::display →Page::displaySlice → Gfx::Gfx → GfxResources::GfxResources → GfxFontDict::GfxFontDict → XRef::fetch → Parser::getObj 다음 부터 무한 재귀 호출이 시작되고 있습니다.main 부터 차례대로 분석해보겠습니다.주석을 기반으로 main 함수의 실행 흐름을 살펴보겠습니다.
graph TD
parse_args --> read_config_file
read_config_file --> mapping_encoding
mapping_encoding --> open_PDF
open_PDF --> check_for_copy_permission
check_for_copy_permission --> get_page_range
get_page_range --> write_HTML_Header
write_HTML_Header --> write_Text_File
main 함수는 위와 같은 흐름으로 진행됩니다.PDFDoc::displayPage 함수를 호출합니다. // write text file
textOut = new TextOutputDev(textFileName->getCString(),
physLayout, rawOrder, htmlMeta);
if (textOut->isOk()) {
doc->displayPages(textOut, firstPage, lastPage, 72, 72, 0,
gFalse, gTrue, gFalse);
} else {
delete textOut;
exitCode = 2;
goto err3;
}
delete textOut;
void PDFDoc::displayPages(OutputDev *out, int firstPage, int lastPage,
double hDPI, double vDPI, int rotate,
GBool useMediaBox, GBool crop, GBool printing,
GBool (*abortCheckCbk)(void *data),
void *abortCheckCbkData) {
int page;
for (page = firstPage; page <= lastPage; ++page) {
displayPage(out, page, hDPI, vDPI, rotate, useMediaBox, crop, printing,
abortCheckCbk, abortCheckCbkData);
}
}
displayPage 함수를 호출합니다.displayPage 함수가 무슨 기능을 하는지 살펴보겠습니다.void PDFDoc::displayPage(OutputDev *out, int page,
double hDPI, double vDPI, int rotate,
GBool useMediaBox, GBool crop, GBool printing,
GBool (*abortCheckCbk)(void *data),
void *abortCheckCbkData) {
if (globalParams->getPrintCommands()) {
printf("***** page %d *****\n", page);
}
catalog->getPage(page)->display(out, hDPI, vDPI,
rotate, useMediaBox, crop, printing, catalog,
abortCheckCbk, abortCheckCbkData);
}
display 함수로 가서 좀 더 자세하게 살펴봐야 할거 같습니다.void Page::display(OutputDev *out, double hDPI, double vDPI,
int rotate, GBool useMediaBox, GBool crop,
GBool printing, Catalog *catalog,
GBool (*abortCheckCbk)(void *data),
void *abortCheckCbkData) {
displaySlice(out, hDPI, vDPI, rotate, useMediaBox, crop,
-1, -1, -1, -1, printing, catalog,
abortCheckCbk, abortCheckCbkData);
}
displaySlice 함수를 봐봐야 할거 같습니다.cppvoid Page::displaySlice(OutputDev *out, double hDPI, double vDPI,
int rotate, GBool useMediaBox, GBool crop,
int sliceX, int sliceY, int sliceW, int sliceH,
GBool printing, Catalog *catalog,
GBool (*abortCheckCbk)(void *data),
void *abortCheckCbkData) {
Page 클래스의 displaySlice 메서드로, 페이지의 특정 슬라이스를 출력하는 기능을 수행합니다. 출력 장치, DPI, 회전 각도, 미디어 박스 사용 여부, 자르기 여부, 슬라이스의 위치와 크기, 인쇄 여부 등을 매개변수로 받습니다.cpp PDFRectangle *mediaBox, *cropBox;
PDFRectangle box;
Gfx *gfx;
Object obj;
Annots *annotList;
Dict *acroForm;
int i;
mediaBox, cropBox는 페이지의 박스를 나타내고, gfx는 그래픽 상태를 처리하는 객체입니다. obj, annotList, acroForm은 각각 PDF 객체, 주석 리스트, 아크로폼을 나타냅니다.cpp if (!out->checkPageSlice(this, hDPI, vDPI, rotate, useMediaBox, crop,
sliceX, sliceY, sliceW, sliceH,
printing, catalog,
abortCheckCbk, abortCheckCbkData)) {
return;
}
out 객체의 checkPageSlice 메서드를 호출하여 슬라이스가 유효한지 확인합니다. 유효하지 않으면 함수를 종료합니다.cpp rotate += getRotate();
if (rotate >= 360) {
rotate -= 360;
} else if (rotate < 0) {
rotate += 360;
}
cpp makeBox(hDPI, vDPI, rotate, useMediaBox, out->upsideDown(),
sliceX, sliceY, sliceW, sliceH, &box, &crop);
makeBox 메서드를 호출하여 출력할 영역의 박스를 설정합니다. 이 박스는 DPI와 슬라이스 정보를 기반으로 합니다.cpp cropBox = getCropBox();
cpp if (globalParams->getPrintCommands()) {
mediaBox = getMediaBox();
printf("***** MediaBox = ll:%g,%g ur:%g,%g\n",
mediaBox->x1, mediaBox->y1, mediaBox->x2, mediaBox->y2);
printf("***** CropBox = ll:%g,%g ur:%g,%g\n",
cropBox->x1, cropBox->y1, cropBox->x2, cropBox->y2);
printf("***** Rotate = %d\n", attrs->getRotate());
}
cpp gfx = new Gfx(xref, out, num, attrs->getResourceDict(),
hDPI, vDPI, &box, crop ? cropBox : (PDFRectangle *)NULL,
rotate, abortCheckCbk, abortCheckCbkData);
Gfx 객체를 생성하여 페이지의 그래픽 상태를 설정합니다. 크롭 박스가 있을 경우에만 사용합니다.cpp contents.fetch(xref, &obj);
if (!obj.isNull()) {
gfx->saveState();
gfx->display(&obj);
gfx->restoreState();
}
obj.free();
➡️ display 가 진짜로 화면에 출력하는 기능인거 같습니다.
지금부터는 화면 출력을 위한 그래픽 처리 코드는 제외하고 버그 관련 코드에 집중해서 분석을 해보겠습니다.
Gfx::Gfx(XRef *xrefA, OutputDev *outA, int pageNum, Dict *resDict,
double hDPI, double vDPI, PDFRectangle *box,
PDFRectangle *cropBox, int rotate,
GBool (*abortCheckCbkA)(void *data),
void *abortCheckCbkDataA) {
int i;
xref = xrefA;
subPage = gFalse;
printCommands = globalParams->getPrintCommands();
// start the resource stack
res = new GfxResources(xref, resDict, NULL);
GfxResource 객체를 생성하여 그래픽 리소스를 관리합니다.GfxResources::GfxResources(XRef *xref, Dict *resDict, GfxResources *nextA) {
Object obj1, obj2;
Ref r;
if (resDict) {
// build font dictionary
fonts = NULL;
resDict->lookupNF("Font", &obj1);
/Font /<Font>obj1 에 저장하는 코드입니다. if (obj1.isRef())
{
obj1.fetch(xref, &obj2);
if (obj2.isDict())
{
r = obj1.getRef();
fonts = new GfxFontDict(xref, &r, obj2.getDict());
}
obj2.free();
}
obj1 이 간접 참조 객체라면 참조값을 가져와야 합니다.fetch 함수가 xref 테이블을 참조하여 참조값을 obj2 에 저장합니다.obj2 가 딕셔너리 객체인 것이 확인되면 obj1 이 참조하고 있는 폰트 객체를 가져와 r 에 저장합니다.fonts 에 GfxFontDict 객체를 만들어 폰트 값을 저장합니다.obj2 객체는 해제합니다. else if (obj1.isDict())
{
fonts = new GfxFontDict(xref, NULL, obj1.getDict());
}
obj1.free();
obj1 이 딕셔너리 객체이면obj1 에서 폰트 객체를 가져와 바로 GfxFontDict 객체를 만들고 font 변수에 저장합니다.obj1 객체는 해제해줍니다.GfxFontDict::GfxFontDict(XRef *xref, Ref *fontDictRef, Dict *fontDict)
{
int i;
Object obj1, obj2;
Ref r;
numFonts = fontDict->getLength();
fonts = (GfxFont **)gmallocn(numFonts, sizeof(GfxFont *));
for (i = 0; i < numFonts; ++i)
{
fontDict 를 통해 폰트 딕셔너리 객체의 개수를 구하고 해당 개수 만큼 객체를 생성하여 font 에 대입합니다. fontDict->getValNF(i, &obj1);
obj1.fetch(xref, &obj2);
if (obj2.isDict())
{
if (obj1.isRef())
{
r = obj1.getRef();
}
else
{
// no indirect reference for this font, so invent a unique one
// (legal generation numbers are five digits, so any 6-digit
// number would be safe)
r.num = i;
if (fontDictRef)
{
r.gen = 100000 + fontDictRef->num;
}
else
{
r.gen = 999999;
}
}
fonts[i] = GfxFont::makeFont(xref, fontDict->getKey(i),
r, obj2.getDict());
if (fonts[i] && !fonts[i]->isOk())
{
delete fonts[i];
fonts[i] = NULL;
}
}
fontDict 에서 폰트 객체를 가져와 obj1 에 저장하고 obj1 이 간접 참조형이면 참조값을 obj2 에 저장합니다.obj2 가 딕셔너리이면 r 에 obj1 의 참조값을 저장합니다.obj1 이 직접 참조 객체인 경우 간접 참조 객체를 생성합니다. else
{
error(-1, "font resource is not a dictionary");
fonts[i] = NULL;
}
obj1.free();
obj2.free();
obj2 가 딕셔너리가 아니면 에러를 발생시킵니다.Object *XRef::fetch(int num, int gen, Object *obj)
{
XRefEntry *e;
Parser *parser;
Object obj1, obj2, obj3;
// check for bogus ref - this can happen in corrupted PDF files
if (num < 0 || num >= size)
{
goto err;
}
e = &entries[num];
switch (e->type)
{
case xrefEntryUncompressed:
if (e->gen != gen)
{
goto err;
}
obj1.initNull();
parser = new Parser(this,
new Lexer(this,
str->makeSubStream(start + e->offset, gFalse, 0, &obj1)),
gTrue);
parser->getObj(&obj1);
parser->getObj(&obj2);
parser->getObj(&obj3);
Parser와 Lexer를 생성해서 스트림 객체를 읽기 위한 준비를 하고parser를 통해 각 객체에 해당하는 값을 가져옵니다.obj 는 간접 참조 객체이기 때문에, 파서를 이용해서 참조 값을 가져오는 기능을 하는 함수인거 같습니다.Object *Parser::getObj(Object *obj, Guchar *fileKey,
CryptAlgorithm encAlgorithm, int keyLength,
int objNum, int objGen)
{
char *key;
Stream *str;
Object obj2;
int num;
DecryptStream *decrypt;
GString *s, *s2;
int c;
// refill buffer after inline image data
if (inlineImg == 2)
{
buf1.free();
buf2.free();
lexer->getObj(&buf1);
lexer->getObj(&buf2);
inlineImg = 0;
}
// array
if (buf1.isCmd("["))
{
shift();
obj->initArray(xref);
while (!buf1.isCmd("]") && !buf1.isEOF())
obj->arrayAdd(getObj(&obj2, fileKey, encAlgorithm, keyLength,
objNum, objGen));
if (buf1.isEOF())
error(getPos(), "End of file inside array");
shift();
// dictionary or stream
}
[ 로 시작해서 여러 개의 데이터가 오고 ] 로 끝납니다.[ 가 나오면 obj 객체를 초기화] 가 나올 때 까지 배열에 객체를 계속 추가합니다. // dictionary or stream
}
else if (buf1.isCmd("<<"))
{
shift();
obj->initDict(xref);
while (!buf1.isCmd(">>") && !buf1.isEOF())
{
if (!buf1.isName())
{
error(getPos(), "Dictionary key must be a name object");
shift();
}
else
{
key = copyString(buf1.getName());
shift();
if (buf1.isEOF() || buf1.isError())
{
gfree(key);
break;
}
obj->dictAdd(key, getObj(&obj2, fileKey, encAlgorithm, keyLength,
objNum, objGen));
}
}
if (buf1.isEOF())
error(getPos(), "End of file inside dictionary");
<< 로 시작해서 >> 로 끝납니다.<< 를 만나면 obj 객체를 초기화합니다.>> 가 나오거나 EOF 파일 끝이 나올 때 까지 반복문을 돌며 key를 가져오고 key에 해당하는 값을 가져와 딕셔너리에 추가합니다.buf1 이 EOF 면 에러를 발생시킵니다. // stream objects are not allowed inside content streams or
// object streams
if (allowStreams && buf2.isCmd("stream"))
{
if ((str = makeStream(obj, fileKey, encAlgorithm, keyLength,
objNum, objGen)))
{
obj->initStream(str);
}
else
{
obj->free();
obj->initError();
}
}
else
{
shift();
}
stream 으로 시작하고 endstream 으로 끝납니다.➡️ getObj 함수는 각 객체 내부의 값이나 객체들을 하나로 모으는 함수인거 같습니다.
Stream *Parser::makeStream(Object *dict, Guchar *fileKey,
CryptAlgorithm encAlgorithm, int keyLength,
int objNum, int objGen)
{
Object obj;
BaseStream *baseStr;
Stream *str;
Guint pos, endPos, length;
// get stream start position
lexer->skipToNextLine();
pos = lexer->getPos();
// get length
dict->dictLookup("Length", &obj);
if (obj.isInt())
{
length = (Guint)obj.getInt();
obj.free();
}
else
{
error(getPos(), "Bad 'Length' attribute in stream");
obj.free();
return NULL;
}
length 에 길이를 저장하고 obj 객체를 해제합니다.NULL 을 반환합니다. // check for length in damaged file
if (xref && xref->getStreamEnd(pos, &endPos))
{
length = endPos - pos;
}
getStreamEnd 함수를 이용해서 endPos 에 스트림의 끝 위치를 저장합니다.length 변수에 대입합니다. // in badly damaged PDF files, we can run off the end of the input
// stream immediately after the "stream" token
if (!lexer->getStream())
{
return NULL;
}
baseStr = lexer->getStream()->getBaseStream();
// skip over stream data
lexer->setPos(pos + length);
setPos 함수를 통해 해당 부분을 건너 뜁니다. // refill token buffers and check for 'endstream'
shift(); // kill '>>'
shift(); // kill 'stream'
if (buf1.isCmd("endstream"))
{
shift();
}
else
{
error(getPos(), "Missing 'endstream'");
// kludge for broken PDF files: just add 5k to the length, and
// hope its enough
length += 5000;
}
endstream 이 있는지 확인하고 없으면 length 에 5000을 더해서 임의로 처리합니다. // make base stream
str = baseStr->makeSubStream(pos, gTrue, length, dict);
// handle decryption
if (fileKey)
{
str = new DecryptStream(str, fileKey, encAlgorithm, keyLength,
objNum, objGen);
}
// get filters
str = str->addFilters(dict);
return str;
}
fileKey 가 존재할 때 스트림을 복호화합니다.str 에 추가합니다.str 를 리턴합니다.crash
5 0 obj % page contZnt
<<
/Length 4 0 R
44
>>
stream
BT
70 50 TD
/F1 12 Tf
(Hello, world!) Tj
ET
endstream
endobj
Length 딕셔너리 객체가 존재하고 값은 4 0 R 로 간접 참조 형태입니다.dictLookup() 함수가 호출될거 같습니다.inline Object *Object::dictLookup(char *key, Object *obj)
{ return dict->lookup(key, obj); }
Object *Dict::lookup(char *key, Object *obj) {
DictEntry *e;
return (e = find(key)) ? e->val.fetch(xref, obj) : obj->initNull();
}
key 에 해당하는 키가 존재하면 fetch 함수를 통해 값을 반환합니다.initNull 함수를 통해 NULL 을 반환합니다.Object *Object::fetch(XRef *xref, Object *obj) {
return (type == objRef && xref) ?
xref->fetch(ref.num, ref.gen, obj) : copy(obj);
}
Object *XRef::fetch(int num, int gen, Object *obj)
{
XRefEntry *e;
Parser *parser;
Object obj1, obj2, obj3;
// check for bogus ref - this can happen in corrupted PDF files
if (num < 0 || num >= size)
{
goto err;
}
e = &entries[num];
num 이 유효한 범위가 아니면 에러를 처리하는 곳으로 점프합니다.num에 해당하는 XRefEntry를 e에 설정합니다. switch (e->type)
{
case xrefEntryUncompressed:
if (e->gen != gen)
{
goto err;
}
obj1.initNull();
parser = new Parser(this,
new Lexer(this,
str->makeSubStream(start + e->offset, gFalse, 0, &obj1)),
gTrue);
parser->getObj(&obj1);
parser->getObj(&obj2);
parser->getObj(&obj3);
if (!obj1.isInt() || obj1.getInt() != num ||
!obj2.isInt() || obj2.getInt() != gen ||
!obj3.isCmd("obj"))
{
obj1.free();
obj2.free();
obj3.free();
delete parser;
goto err;
}
parser->getObj(obj, encrypted ? fileKey : (Guchar *)NULL,
encAlgorithm, keyLength, num, gen);
obj1.free();
obj2.free();
obj3.free();
delete parser;
break;
gen과 일치하는지 확인합니다. 일치하지 않으면 오류 처리로 이동합니다.obj1 을 초기화 하고 Parser와 Lexer를 생성하여 스트림 객체를 읽기 위한 준비를 합니다.Parser 의 getObj 함수를 통해 3개의 객체를 가져옵니다.num과 일치하고, 두 번째 객체가 gen과 일치하며, 세 번째 객체가 "obj" 인지 확인합니다.obj 를 해제하고 parser 를 삭제하고 에러를 처리하는 곳으로 점프합니다.obj 를 얻어옵니다. 암호화 되어 있는 경우 fileKey 를 사용합니다.obj 를 해제하고 parser를 삭제한 후 break를 합니다. case xrefEntryCompressed:
if (gen != 0)
{
goto err;
}
if (!objStr || objStr->getObjStrNum() != (int)e->offset)
{
if (objStr)
{
delete objStr;
}
objStr = new ObjectStream(this, e->offset);
}
objStr->getObject(e->gen, num, obj);
break;
gen 이 0이 아니면 에러를 처리하는 곳으로 점프합니다.offset이 다르다면, 기존 스트림을 해제하고 새로운 ObjectStream을 생성합니다.objStr 의 객체를 얻어오고 break를 합니다. default:
goto err;
}
return obj;
err:
return obj->initNull();
}
obj 를 리턴합니다.obj 를 initNull() 함수를 통해 NULL 로 초기화합니다.crash
4 0 obj
<<
/Type �� /Font
/Subtype /Type1
/BaseFont /Times-Roman
>>
endobj
5 0 obj % page contZnt
<<
/Length 4 0 R
44
>>
getObj 함수를 통해 얻어와야 합니다. // dictionary or stream
} else if (buf1.isCmd("<<")) {
shift();
obj->initDict(xref);
while (!buf1.isCmd(">>") && !buf1.isEOF()) {
if (!buf1.isName()) {
error(getPos(), "Dictionary key must be a name object");
shift();
} else {
key = copyString(buf1.getName());
shift();
if (buf1.isEOF() || buf1.isError()) {
gfree(key);
break;
}
getObj 함수가 실행되서 stream을 파싱하려고 하면 �� ****로 인해 에러가 발생합니다./ 를 통해 구분됩니다.하지만 crash 파일을 봐보면
4 0 obj
<<
/Type �� /Font
/Subtype /Type1
/BaseFont /Times-Roman
>>
endobj
/Type 다음 name object 형식에 맞지 않은 키 값이 들어가 있습니다.error(getPos(), "Dictionary key must be a name object"); 가 실행됩니다.그리고 나서 다시 makeStream 함수가 호출되고 무한 재귀 호출 루프를 돌게 됩니다.
// stream objects are not allowed inside content streams or
// object streams
if (allowStreams && buf2.isCmd("stream")) {
if ((str = makeStream(obj, fileKey, encAlgorithm, keyLength,
objNum, objGen))) {
obj->initStream(str);
} else {
obj->free();
obj->initError();
}
Object *XRef::fetch(int num, int gen, Object *obj) {
XRefEntry *e;
Parser *parser;
Object obj1, obj2, obj3;
// check for bogus ref - this can happen in corrupted PDF files
if (num < 0 || num >= size) {
goto err;
}
e = &entries[num];
switch (e->type) {
case xrefEntryUncompressed:
if (e->gen != gen) {
goto err;
}
obj1.initNull();
parser = new Parser(this,
new Lexer(this,
str->makeSubStream(start + e->offset, gFalse, 0, &obj1)),
gTrue);
parser->getObj(&obj1);
parser->getObj(&obj2);
parser->getObj(&obj3);
if (!obj1.isInt() || obj1.getInt() != num ||
!obj2.isInt() || obj2.getInt() != gen ||
!obj3.isCmd("obj")) {
obj1.free();
obj2.free();
obj3.free();
delete parser;
goto err;
}
parser->getObj(obj, encrypted ? fileKey : (Guchar *)NULL,
encAlgorithm, keyLength, num, gen);
obj1.free();
obj2.free();
obj3.free();
delete parser;
break;
case xrefEntryCompressed:
if (gen != 0) {
goto err;
}
if (!objStr || objStr->getObjStrNum() != (int)e->offset) {
if (objStr) {
delete objStr;
}
objStr = new ObjectStream(this, e->offset);
}
objStr->getObject(e->gen, num, obj);
break;
default:
goto err;
}
return obj;
err:
return obj->initNull();
}
obj 값은 fetch 함수에서 참조에 실패 한게 아니면 NULL 로 만들지 않기 때문에, 이전에 makeStream 값을 호출할 때 랑 동일합니다.stream 의 길이 딕셔너리 객체를 찾게 되고 Dictionary key must be a name object 에러가 발생하는 것이 반복됩니다.이에 대한 근거로 콘솔에 해당 에러가 엄청나게 찍혀있습니다.
Error (449): Dictionary key must be a name object
Error (459): Dictionary key must be a name object
Error (477): Dictionary key must be a name object
Error (424): Dictionary key must be a name object
Error (426): Dictionary key must be a name object
Error (430): Dictionary key must be a name object
Error (449): Dictionary key must be a name object
Error (459): Dictionary key must be a name object
Error (477): Dictionary key must be a name object
Error (424): Dictionary key must be a name object
Error (426): Dictionary key must be a name object
Error (430): Dictionary key must be a name object
Error (449): Dictionary key must be a name object
Error (459): Dictionary key must be a name object
Error (477): Dictionary key must be a name object
Error (424): Dictionary key must be a name object
Error (426): Dictionary key must be a name object
Error (430): Dictionary key must be a name object
Error (449): Dictionary key must be a name object
Error (459): Dictionary key must be a name object
Error (477): Dictionary key must be a name object
Error (424): Dictionary key must be a name object
Error (426): Dictionary key must be a name object
Error (430): Dictionary key must be a name object
Error (449): Dictionary key must be a name object
Error (459): Dictionary key must be a name object
Error (477): Dictionary key must be a name object
Error (424): Dictionary key must be a name object
Error (426): Dictionary key must be a name object
Error (430): Dictionary key must be a name object
Error (449): Dictionary key must be a name object
Error (459): Dictionary key must be a name object
Error (477): Dictionary key must be a name object
Error (424): Dictionary key must be a name object
Error (426): Dictionary key must be a name object
Error (430): Dictionary key must be a name object
Error (449): Dictionary key must be a name object
.
.
.
전역변수로 재귀호출을 카운트하는 변수를 선언하고 해당 변수 값이 1000 이상이 되면 재귀호출을 발생시키는 조건문들이 실행되지 못하도록 패치를 하겠습니다.
int recursionCount = 0; // 재귀호출을 카운트할 전역 변수
Object *Parser::getObj(Object *obj, Guchar *fileKey,
CryptAlgorithm encAlgorithm, int keyLength,
int objNum, int objGen) {
char *key;
Stream *str;
Object obj2;
int num;
DecryptStream *decrypt;
GString *s, *s2;
int c;
int recursionLimit = 1000 // 최대 재귀호출 횟수
recursionCount++; // 한번 호출될 때 마다 재귀호출 횟수 카운트
// refill buffer after inline image data
if (inlineImg == 2) {
buf1.free();
buf2.free();
lexer->getObj(&buf1);
lexer->getObj(&buf2);
inlineImg = 0;
}
// array
if (buf1.isCmd("[") && recursionCount < recursionLimit) {
shift();
obj->initArray(xref);
while (!buf1.isCmd("]") && !buf1.isEOF())
obj->arrayAdd(getObj(&obj2, fileKey, encAlgorithm, keyLength,
objNum, objGen));
if (buf1.isEOF())
error(getPos(), "End of file inside array");
shift();
// dictionary or stream
} else if (buf1.isCmd("<<") && recursionCount < recursionLimit) {
shift();
obj->initDict(xref);
while (!buf1.isCmd(">>") && !buf1.isEOF()) {
if (!buf1.isName()) {
error(getPos(), "Dictionary key must be a name object");
shift();
} else {
key = copyString(buf1.getName());
shift();
if (buf1.isEOF() || buf1.isError()) {
gfree(key);
break;
}
obj->dictAdd(key, getObj(&obj2, fileKey, encAlgorithm, keyLength,
objNum, objGen));
}
}
if (buf1.isEOF())
error(getPos(), "End of file inside dictionary");
// stream objects are not allowed inside content streams or
// object streams
if (allowStreams && buf2.isCmd("stream")) {
if ((str = makeStream(obj, fileKey, encAlgorithm, keyLength,
objNum, objGen))) {
obj->initStream(str);
} else {
obj->free();
obj->initError();
}
} else {
shift();
}
// indirect reference or integer
} else if (buf1.isInt()) {
num = buf1.getInt();
shift();
if (buf1.isInt() && buf2.isCmd("R")) {
obj->initRef(num, buf1.getInt());
shift();
shift();
} else {
obj->initInt(num);
}
// string
} else if (buf1.isString() && fileKey) {
s = buf1.getString();
s2 = new GString();
obj2.initNull();
decrypt = new DecryptStream(new MemStream(s->getCString(), 0,
s->getLength(), &obj2),
fileKey, encAlgorithm, keyLength,
objNum, objGen);
decrypt->reset();
while ((c = decrypt->getChar()) != EOF) {
s2->append((char)c);
}
delete decrypt;
obj->initString(s2);
shift();
// simple object
} else {
buf1.copy(obj);
shift();
}
return obj;
}
rm -r $HOME/fuzzing_xpdf/install
cd $HOME/fuzzing_xpdf/xpdf-3.02/
make clean
export LLVM_CONFIG="llvm-config-11"
CC=$HOME/AFLplusplus/afl-clang-fast CXX=$HOME/AFLplusplus/afl-clang-fast++ ./configure --prefix="$HOME/fuzzing_xpdf/install/"
make
make install
❯ gdb-gef --args ~/fuzzing_xpdf/install/bin/pdftotext ./crash1
Reading symbols from /home/ion/fuzzing_xpdf/install/bin/pdftotext...
Error while writing index for `/home/ion/fuzzing_xpdf/install/bin/pdftotext': mkstemp: No such file or directory.
GEF for linux ready, type `gef' to start, `gef config' to configure
93 commands loaded and 5 functions added for GDB 12.1 in 0.00ms using Python engine 3.10
gef➤ r
.
.
.
Error (485): Bad 'Length' attribute in stream
Error (485): Bad 'Length' attribute in stream
Error (485): Bad 'Length' attribute in stream
Error (485): Bad 'Length' attribute in stream
Error (485): Bad 'Length' attribute in stream
Error (485): Bad 'Length' attribute in stream
Error (485): Bad 'Length' attribute in stream
Error (485): Bad 'Length' attribute in stream
Error (485): Bad 'Length' attribute in stream
Error (485): Bad 'Length' attribute in stream
Error (485): Bad 'Length' attribute in stream
Error (485): Bad 'Length' attribute in stream
Error: font resource is not a dictionary
Error: Weird page contents
[Inferior 1 (process 239000) exited normally]