Page Menu
Home
WickedGov Phorge
Search
Configure Global Search
Log In
Files
F1429298
PdfHandler.php
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Flag For Later
Award Token
Size
11 KB
Referenced Files
None
Subscribers
None
PdfHandler.php
View Options
<?php
namespace
MediaWiki\Extension\PdfHandler
;
use
File
;
use
ImageHandler
;
use
MediaTransformError
;
use
MediaTransformOutput
;
use
MediaWiki\Context\IContextSource
;
use
MediaWiki\MediaWikiServices
;
use
MediaWiki\PoolCounter\PoolCounterWorkViaCallback
;
use
ThumbnailImage
;
use
TransformParameterError
;
/**
* Copyright © 2007 Martin Seidel (Xarax) <jodeldi@gmx.de>
*
* Inspired by djvuhandler from Tim Starling
* Modified and written by Xarax
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*/
class
PdfHandler
extends
ImageHandler
{
/**
* Keep in sync with pdfhandler.messages in extension.json
*
* @see getWarningConfig
*/
private
const
MESSAGES
=
[
'main'
=>
'pdf-file-page-warning'
,
'header'
=>
'pdf-file-page-warning-header'
,
'info'
=>
'pdf-file-page-warning-info'
,
'footer'
=>
'pdf-file-page-warning-footer'
,
];
/**
* 10MB is considered a large file
*/
private
const
LARGE_FILE
=
1e7
;
/**
* Key for getHandlerState for value of type PdfImage
*/
private
const
STATE_PDF_IMAGE
=
'pdfImage'
;
/**
* Key for getHandlerState for dimension info
*/
private
const
STATE_DIMENSION_INFO
=
'pdfDimensionInfo'
;
/**
* @param File $file
* @return bool
*/
public
function
mustRender
(
$file
)
{
return
true
;
}
/**
* @param File $file
* @return bool
*/
public
function
isMultiPage
(
$file
)
{
return
true
;
}
/**
* @param string $name
* @param string $value
* @return bool
*/
public
function
validateParam
(
$name
,
$value
)
{
if
(
$name
===
'page'
&&
trim
(
$value
)
!==
(
string
)
intval
(
$value
)
)
{
// Extra junk on the end of page, probably actually a caption
// e.g. [[File:Foo.pdf|thumb|Page 3 of the document shows foo]]
return
false
;
}
if
(
in_array
(
$name
,
[
'width'
,
'height'
,
'page'
]
)
)
{
return
(
$value
>
0
);
}
return
false
;
}
/**
* @param array $params
* @return bool|string
*/
public
function
makeParamString
(
$params
)
{
$page
=
$params
[
'page'
]
??
1
;
if
(
!
isset
(
$params
[
'width'
]
)
)
{
return
false
;
}
return
"page{$page}-{$params['width']}px"
;
}
/**
* @param string $str
* @return array|bool
*/
public
function
parseParamString
(
$str
)
{
$m
=
[];
if
(
preg_match
(
'/^page(
\d
+)-(
\d
+)px$/'
,
$str
,
$m
)
)
{
return
[
'width'
=>
$m
[
2
],
'page'
=>
$m
[
1
]
];
}
return
false
;
}
/**
* @param array $params
* @return array
*/
public
function
getScriptParams
(
$params
)
{
return
[
'width'
=>
$params
[
'width'
],
'page'
=>
$params
[
'page'
],
];
}
/**
* @return array
*/
public
function
getParamMap
()
{
return
[
'img_width'
=>
'width'
,
'img_page'
=>
'page'
,
];
}
/**
* @param int $width
* @param int $height
* @param string $msg
* @return MediaTransformError
*/
protected
function
doThumbError
(
$width
,
$height
,
$msg
)
{
return
new
MediaTransformError
(
'thumbnail_error'
,
$width
,
$height
,
wfMessage
(
$msg
)->
inContentLanguage
()->
text
()
);
}
/**
* @param File $image
* @param string $dstPath
* @param string $dstUrl
* @param array $params
* @param int $flags
* @return MediaTransformError|MediaTransformOutput|ThumbnailImage|TransformParameterError
*/
public
function
doTransform
(
$image
,
$dstPath
,
$dstUrl
,
$params
,
$flags
=
0
)
{
global
$wgPdfProcessor
,
$wgPdfPostProcessor
,
$wgPdfHandlerDpi
,
$wgPdfHandlerJpegQuality
;
if
(
!
$this
->
normaliseParams
(
$image
,
$params
)
)
{
return
new
TransformParameterError
(
$params
);
}
$width
=
(
int
)
$params
[
'width'
];
$height
=
(
int
)
$params
[
'height'
];
$page
=
(
int
)
$params
[
'page'
];
if
(
$page
>
$this
->
pageCount
(
$image
)
)
{
return
$this
->
doThumbError
(
$width
,
$height
,
'pdf_page_error'
);
}
if
(
$flags
&
self
::
TRANSFORM_LATER
)
{
return
new
ThumbnailImage
(
$image
,
$dstUrl
,
false
,
[
'width'
=>
$width
,
'height'
=>
$height
,
'page'
=>
$page
,
]
);
}
if
(
!
wfMkdirParents
(
dirname
(
$dstPath
),
null
,
__METHOD__
)
)
{
return
$this
->
doThumbError
(
$width
,
$height
,
'thumbnail_dest_directory'
);
}
// Thumbnail extraction is very inefficient for large files.
// Provide a way to pool count limit the number of downloaders.
if
(
$image
->
getSize
()
>=
self
::
LARGE_FILE
)
{
$work
=
new
PoolCounterWorkViaCallback
(
'GetLocalFileCopy'
,
sha1
(
$image
->
getName
()
),
[
'doWork'
=>
static
function
()
use
(
$image
)
{
return
$image
->
getLocalRefPath
();
}
]
);
$srcPath
=
$work
->
execute
();
}
else
{
$srcPath
=
$image
->
getLocalRefPath
();
}
if
(
$srcPath
===
false
)
{
// could not download original
return
$this
->
doThumbError
(
$width
,
$height
,
'filemissing'
);
}
$cmd
=
'('
.
wfEscapeShellArg
(
$wgPdfProcessor
,
"-sDEVICE=jpeg"
,
"-sOutputFile=-"
,
"-sstdout=%stderr"
,
"-dFirstPage={$page}"
,
"-dLastPage={$page}"
,
"-dSAFER"
,
"-r{$wgPdfHandlerDpi}"
,
// CropBox defines the region that the PDF viewer application is expected to display or print.
"-dUseCropBox"
,
"-dBATCH"
,
"-dNOPAUSE"
,
"-q"
,
$srcPath
);
$cmd
.=
" | "
.
wfEscapeShellArg
(
$wgPdfPostProcessor
,
"-depth"
,
"8"
,
"-quality"
,
$wgPdfHandlerJpegQuality
,
"-resize"
,
(
string
)
$width
,
"-"
,
$dstPath
);
$cmd
.=
")"
;
wfDebug
(
__METHOD__
.
": $cmd
\n
"
);
$retval
=
''
;
$err
=
wfShellExecWithStderr
(
$cmd
,
$retval
);
$removed
=
$this
->
removeBadFile
(
$dstPath
,
$retval
);
if
(
$retval
!=
0
||
$removed
)
{
wfDebugLog
(
'thumbnail'
,
sprintf
(
'thumbnail failed on %s: error %d "%s" from "%s"'
,
wfHostname
(),
$retval
,
trim
(
$err
),
$cmd
)
);
return
new
MediaTransformError
(
'thumbnail_error'
,
$width
,
$height
,
$err
);
}
return
new
ThumbnailImage
(
$image
,
$dstUrl
,
$dstPath
,
[
'width'
=>
$width
,
'height'
=>
$height
,
'page'
=>
$page
,
]
);
}
/**
* @param \MediaHandlerState $state
* @param string $path
* @return PdfImage
*/
private
function
getPdfImage
(
$state
,
$path
)
{
$pdfImg
=
$state
->
getHandlerState
(
self
::
STATE_PDF_IMAGE
);
if
(
!
$pdfImg
)
{
$pdfImg
=
new
PdfImage
(
$path
);
$state
->
setHandlerState
(
self
::
STATE_PDF_IMAGE
,
$pdfImg
);
}
return
$pdfImg
;
}
/**
* @param \MediaHandlerState $state
* @param string $path
* @return array|bool
*/
public
function
getSizeAndMetadata
(
$state
,
$path
)
{
$metadata
=
$this
->
getPdfImage
(
$state
,
$path
)->
retrieveMetaData
();
$sizes
=
PdfImage
::
getPageSize
(
$metadata
,
1
);
if
(
$sizes
)
{
return
$sizes
+
[
'metadata'
=>
$metadata
];
}
return
[
'metadata'
=>
$metadata
];
}
/**
* @param string $ext
* @param string $mime
* @param null $params
* @return array
*/
public
function
getThumbType
(
$ext
,
$mime
,
$params
=
null
)
{
global
$wgPdfOutputExtension
;
static
$mime
;
if
(
!
isset
(
$mime
)
)
{
$magic
=
MediaWikiServices
::
getInstance
()->
getMimeAnalyzer
();
$mime
=
$magic
->
guessTypesForExtension
(
$wgPdfOutputExtension
);
}
return
[
$wgPdfOutputExtension
,
$mime
];
}
/**
* @param File $file
* @return bool|int
*/
public
function
isFileMetadataValid
(
$file
)
{
$data
=
$file
->
getMetadataItems
(
[
'mergedMetadata'
,
'pages'
]
);
if
(
!
isset
(
$data
[
'pages'
]
)
)
{
return
self
::
METADATA_BAD
;
}
if
(
!
isset
(
$data
[
'mergedMetadata'
]
)
)
{
return
self
::
METADATA_COMPATIBLE
;
}
return
self
::
METADATA_GOOD
;
}
/**
* @param File $image
* @param bool|IContextSource $context Context to use (optional)
* @return bool|array
*/
public
function
formatMetadata
(
$image
,
$context
=
false
)
{
$mergedMetadata
=
$image
->
getMetadataItem
(
'mergedMetadata'
);
if
(
!
is_array
(
$mergedMetadata
)
||
!
count
(
$mergedMetadata
)
)
{
return
false
;
}
// Inherited from MediaHandler.
return
$this
->
formatMetadataHelper
(
$mergedMetadata
,
$context
);
}
/** @inheritDoc */
protected
function
formatTag
(
string
$key
,
$vals
,
$context
=
false
)
{
switch
(
$key
)
{
case
'pdf-Producer'
:
case
'pdf-Version'
:
return
htmlspecialchars
(
$vals
);
case
'pdf-PageSize'
:
foreach
(
$vals
as
&
$val
)
{
$val
=
htmlspecialchars
(
$val
);
}
return
$vals
;
case
'pdf-Encrypted'
:
// @todo: The value isn't i18n-ised; should be done here.
// For reference, if encrypted this field's value looks like:
// "yes (print:yes copy:no change:no addNotes:no)"
return
htmlspecialchars
(
$vals
);
default
:
break
;
}
// Use default formatting
return
false
;
}
/**
* @param File $image
* @return bool|int
*/
public
function
pageCount
(
File
$image
)
{
$info
=
$this
->
getDimensionInfo
(
$image
);
return
$info
?
$info
[
'pageCount'
]
:
false
;
}
/**
* @param File $image
* @param int $page
* @return array|bool
*/
public
function
getPageDimensions
(
File
$image
,
$page
)
{
// MW starts pages at 1, as they are stored here
$index
=
$page
;
$info
=
$this
->
getDimensionInfo
(
$image
);
if
(
$info
&&
isset
(
$info
[
'dimensionsByPage'
][
$index
]
)
)
{
return
$info
[
'dimensionsByPage'
][
$index
];
}
return
false
;
}
/**
* @param File $file
* @return bool|mixed
*/
protected
function
getDimensionInfo
(
File
$file
)
{
$info
=
$file
->
getHandlerState
(
self
::
STATE_DIMENSION_INFO
);
if
(
!
$info
)
{
$cache
=
MediaWikiServices
::
getInstance
()->
getMainWANObjectCache
();
$info
=
$cache
->
getWithSetCallback
(
$cache
->
makeKey
(
'file-pdf-dimensions'
,
$file
->
getSha1
()
),
$cache
::
TTL_MONTH
,
static
function
()
use
(
$file
)
{
$data
=
$file
->
getMetadataItems
(
PdfImage
::
ITEMS_FOR_PAGE_SIZE
);
if
(
!
$data
||
!
isset
(
$data
[
'Pages'
]
)
)
{
return
false
;
}
$dimsByPage
=
[];
$count
=
intval
(
$data
[
'Pages'
]
);
for
(
$i
=
1
;
$i
<=
$count
;
$i
++
)
{
$dimsByPage
[
$i
]
=
PdfImage
::
getPageSize
(
$data
,
$i
);
}
return
[
'pageCount'
=>
$count
,
'dimensionsByPage'
=>
$dimsByPage
];
}
);
}
$file
->
setHandlerState
(
self
::
STATE_DIMENSION_INFO
,
$info
);
return
$info
;
}
/**
* @param File $image
* @param int $page
* @return bool
*/
public
function
getPageText
(
File
$image
,
$page
)
{
$pageTexts
=
$image
->
getMetadataItem
(
'text'
);
if
(
!
is_array
(
$pageTexts
)
||
!
isset
(
$pageTexts
[
$page
-
1
]
)
)
{
return
false
;
}
return
$pageTexts
[
$page
-
1
];
}
/**
* Adds a warning about PDFs being potentially dangerous to the file
* page. Multiple messages with this base will be used.
* @param File $file
* @return array
*/
public
function
getWarningConfig
(
$file
)
{
return
[
'messages'
=>
self
::
MESSAGES
,
'link'
=>
'//www.mediawiki.org/wiki/Special:MyLanguage/Help:Security/PDF_files'
,
'module'
=>
'pdfhandler.messages'
,
];
}
public
function
useSplitMetadata
()
{
return
true
;
}
}
File Metadata
Details
Attached
Mime Type
text/x-php
Expires
Sat, May 16, 17:23 (9 h, 49 m)
Storage Engine
local-disk
Storage Format
Raw Data
Storage Handle
49/a5/8d77f991548a065f9da5f08bdf73
Default Alt Text
PdfHandler.php (11 KB)
Attached To
Mode
rMWPROD MediaWiki Production
Attached
Detach File
Event Timeline
Log In to Comment