94 lines
3.7 KiB
Text
94 lines
3.7 KiB
Text
|
|
#------------------------------------------------------------------------------
|
|
# $File: rtf,v 1.9 2020/12/12 20:01:47 christos Exp $
|
|
# rtf: file(1) magic for Rich Text Format (RTF)
|
|
#
|
|
# Duncan P. Simpson, D.P.Simpson@dcs.warwick.ac.uk
|
|
# Update: Joerg Jenderek
|
|
# URL: https://en.wikipedia.org/wiki/Rich_Text_Format
|
|
# Reference: http://www.snake.net/software/RTF/RTF-Spec-1.7.rtf
|
|
# http://www.kleinlercher.at/tools/Windows_Protocols/Word2007RTFSpec9.pdf
|
|
0 string {\\rtf
|
|
# skip DROID fmt-355-signature-id-522.rtf by looking for valid version
|
|
>5 ubyte !0xAB
|
|
# skip also \ in DROID fmt-50-signature-id-158.rtf by looking for valid version
|
|
>>5 ubyte !0x5C Rich Text Format data
|
|
!:mime text/rtf
|
|
!:apple ????RTF
|
|
!:ext rtf
|
|
>>>0 use rtf-info
|
|
# display information like version, language and code page of RTF
|
|
0 name rtf-info
|
|
# 1 mostly, 2 for newer Pocket Word documents, space for test like fdo78502.rtf, { for some urtf
|
|
>5 ubyte !0x7b \b, version %c
|
|
# The word for character set must precede any text or most other control words
|
|
>6 string \\mac \b, Apple Macintosh
|
|
>6 string \\pc
|
|
# control word \pca
|
|
>>9 ubyte =0x61 \b, IBM PS/2, code page 850
|
|
>>9 ubyte !0x61 \b, IBM PC, code page 437
|
|
# unknown character set or ANSI later after control words like
|
|
# \adeflang1025 \info \title \author \category \manager
|
|
# "Burow, Steffanie - Im Tal des Schneeleoparden.rtf"
|
|
#>6 search/105 \\ansi \b, ANSI
|
|
>6 search/502 \\ansi \b, ANSI
|
|
>6 default x \b, unknown character set
|
|
# look for explicit codepage keyword
|
|
# "Burow, Steffanie - Im Tal des Schneeleoparden.rtf"
|
|
#>5 search/110 \\ansicpg
|
|
>5 search/500 \\ansicpg
|
|
# skip unknown or buggy codepage string 0 like in fdo78502.rtf
|
|
>>&0 ubyte !0x30 \b, code page
|
|
# codepage string: 437~United States IBM, ..., 1252~WesternEuropean, ..., 57011~Punjabi
|
|
>>>&-1 string x %-.3s
|
|
# skip space or \ and display possible 4th digit of code page string
|
|
>>>&2 ubyte >0x2F
|
|
>>>>&-1 ubyte <0x3A \b%c
|
|
# possible 5th digit of code page string
|
|
>>>>>&0 ubyte >0x2F
|
|
>>>>>>&-1 ubyte <0x3A \b%c
|
|
# look again at version byte to use default clause
|
|
>5 ubyte x
|
|
# Default language ID for South Asian/Middle Eastern text
|
|
# language ID: 1025, ..., 1065~Persian, ..., 2057~English_UnitedKingdom, ..., 58380~French_NorthAfrica
|
|
# Readme-0.72-Persian.rtf
|
|
#>6 search/1 \\adeflang \b, default middle east language ID
|
|
>>6 search/497 \\adeflang \b, default middle east language ID
|
|
# https://docs.microsoft.com/en-us/openspecs/office_standards/ms-oe376/6c085406-a698-4e12-9d4d-c3b0ee3dbc4a
|
|
>>>&0 string x %.4s
|
|
# skip \ and NL and show possible 5th digit of language string
|
|
>>>&4 ubyte >0x2F
|
|
>>>>&-1 ubyte <0x3A \b%c
|
|
# else look for default language to be used when the \plain control word is encountered
|
|
>>6 default x
|
|
# "Burow, Steffanie - Im Tal des Schneeleoparden.rtf"
|
|
#>>>6 search/127 \\deflang
|
|
>>>6 search/505 \\deflang
|
|
>>>>&0 string >0 \b, default language ID %-.4s
|
|
# possible 5th digit of language string
|
|
>>>>&4 ubyte >0x2F
|
|
>>>>>&-1 ubyte <0x3A \b%c
|
|
|
|
# Reference: http://latex2rtf.sourceforge.net/rtfspec_63.html
|
|
# Note: no real world example found
|
|
0 string {\\urtf Rich Text Format unicoded data
|
|
!:mime text/rtf
|
|
#!:apple ????RTF
|
|
!:ext rtf
|
|
>1 use rtf-info
|
|
|
|
# URL: https://en.wikipedia.org/wiki/Microsoft_Word
|
|
# Reference: http://fileformats.archiveteam.org/wiki/Microsoft_Word
|
|
# Note: called by TrID "Pocket Word document"
|
|
# by PlanMaker "Pocket Word-Handheld PC" for pwd
|
|
# by PlanMaker "Pocket Word-Pocket PC" for psw
|
|
0 string {\\pwd Pocket Word document or template
|
|
# by SoftMaker Office http://extension.nirsoft.net/pwd
|
|
#!:mime application/msword
|
|
# https://reposcope.com/mimetype/application/x-pocket-word
|
|
!:mime application/x-pocket-word
|
|
# PWD for Handheld PC variant and PSW for Pocket PC variant
|
|
# PWT for template
|
|
!:ext pwd/psw/pwt
|
|
>0 use rtf-info
|
|
|