A PHP library for low level access of PDF files

SetaPDF-Core

Access PDF documents at their lowest level with PHP

SetaPDF-Core - Detect colors in PDF files

With the use of the Core system it is possible to walk through a PDF document at its lowest level and analyse its internal structure. In this demo we will show you how to collect Information about used colors and color spaces in a PDF document with PHP.

The demo make use of 2 individual classes:

StreamProcessor.php

PHP
<?php
/**
 * Class StreamProcessor
 *
 * This class offer the desired callback methods for the content stream parser
 */
class StreamProcessor
{
    /**
     * @var \ColorInspector
     */
    protected $_colorInspector;

    /**
     * @var \SetaPDF_Core_Canvas
     */
    protected $_canvas;

    /**
     * @var \SetaPDF_Core_Parser_Content
     */
    protected $_parser;

    /**
     * The constructor
     *
     * @param \SetaPDF_Core_Canvas $canvas
     * @param \ColorInspector $colorInspector
     */
    public function __construct(\SetaPDF_Core_Canvas $canvas, \ColorInspector $colorInspector)
    {
        $this->_canvas = $canvas;
        $this->_colorInspector = $colorInspector;
    }

    /**
     * Callback for standard color operators
     *
     * @param array $args
     * @param string $operator
     */
    public function _color(array $args, $operator)
    {
        $color = \SetaPDF_Core_DataStructure_Color::createByComponents($args);

        $info = 'Standard color operator (' . $operator . ') in content stream.';
        switch (true) {
            case $color instanceof \SetaPDF_Core_DataStructure_Color_Rgb:
                $this->_colorInspector->addFoundColor('DeviceRGB', $color, $info);
                return;
            case $color instanceof \SetaPDF_Core_DataStructure_Color_Gray:
                $this->_colorInspector->addFoundColor('DeviceGray', $color, $info);
                return;
            case $color instanceof \SetaPDF_Core_DataStructure_Color_Cmyk:
                $this->_colorInspector->addFoundColor('DeviceCMYK', $color, $info);
                return;
        }
    }

    /**
     * Callback for color space operators
     *
     * @param array $args
     * @param string $operator
     */
    public function _colorSpace(array $args, $operator)
    {
        $colorSpace = $args[0];
        $colorSpaces = $this->_canvas->getResources(true, false, \SetaPDF_Core_Resource::TYPE_COLOR_SPACE);
        if ($colorSpaces && $colorSpaces->offsetExists($colorSpace->getValue())) {
            $colorSpace = $colorSpaces->getValue($colorSpace->getValue());
        }

        $colorSpace = \SetaPDF_Core_ColorSpace::createByDefinition($colorSpace);

        $info = 'Color space operator (' . $operator . ') in content stream.';
        $this->_resolveColorSpace($colorSpace, $info);
    }

    /**
     * Helper method to recursily resolve color space and their alternate color spaces
     *
     * @param \SetaPDF_Core_ColorSpace $colorSpace
     * @param $info
     */
    protected function _resolveColorSpace(\SetaPDF_Core_ColorSpace $colorSpace, $info)
    {
        $this->_colorInspector->addFoundColor($colorSpace->getFamily(), $colorSpace, $info);

        switch (true) {
            case $colorSpace instanceof \SetaPDF_Core_ColorSpace_Separation:
                $alternate = $colorSpace->getAlternateColorSpace();
                $info = 'Alternate color space for Separation color space.';
                $this->_resolveColorSpace($alternate, $info);
                break;

            case $colorSpace instanceof \SetaPDF_Core_ColorSpace_DeviceN:
                $alternate = $colorSpace->getAlternateColorSpace();
                $info = 'Alternate color space for DeviceN color space.';
                $this->_resolveColorSpace($alternate, $info);
                break;

            case $colorSpace instanceof \SetaPDF_Core_ColorSpace_Indexed:
                $base = $colorSpace->getBase();
                $info = 'Base color space for Indexed color space.';
                $this->_resolveColorSpace($base, $info);
                break;

            case $colorSpace instanceof \SetaPDF_Core_ColorSpace_IccBased:
                $stream = $colorSpace->getIccProfileStream();
                $alternate = $stream->getAlternate();
                if ($alternate) {
                    $info = 'Alternate color space for ICC profile color space.';
                    $this->_resolveColorSpace($alternate, $info);
                }

                /* See ICC.1:2010 - Table 19 (ICC1v43_2010-12.pdf)
                 */
                $info = 'Color space signature extracted from ICC profile.';
                $colorSpace = $stream->getParser()->getColorSpace();
                $this->_colorInspector->addFoundColor(trim($colorSpace), $stream, $info);
                break;
        }
    }

    /**
     * Callback for painting a XObject
     *
     * @param $args
     */
    public function _paintXObject($args)
    {
        $name = $args[0]->getValue();
        $xObjects = $this->_canvas->getResources(true, false, \SetaPDF_Core_Resource::TYPE_X_OBJECT);

        if ($xObjects === false) {
            return;
        }

        $xObjectIndirectObject = $xObjects->getValue($name);
        if (!($xObjectIndirectObject instanceof \SetaPDF_Core_Type_IndirectReference)) {
            return;
        }

        $xObject = \SetaPDF_Core_XObject::get($xObjectIndirectObject);
        if ($xObject instanceof \SetaPDF_Core_XObject_Image) {
            $dict = $xObject->getIndirectObject()->ensure()->getValue();
            if ($dict->offsetExists('ImageMask') && $dict->getValue('ImageMask')->ensure()->getValue() == true) {
                return;
            }

            $colorSpace = $xObject->getColorSpace();
            $info = 'Color space of an image used in a content stream.';
            $this->_resolveColorSpace($colorSpace, $info);

        } elseif ($xObject instanceof \SetaPDF_Core_XObject_Form) {

            /* Get the colorspace from the transparency group */
            $group = $xObject->getGroup();
            if ($group instanceof \SetaPDF_Core_TransparencyGroup) {
                $colorSpace = $group->getColorSpace(true);
                if ($colorSpace !== null) {
                    $info = 'Color space from Transparency Group of XObject.';
                    $this->_resolveColorSpace(\SetaPDF_Core_ColorSpace::createByDefinition($colorSpace), $info);
                }
            }

            /* We got a Form XObject - start recusrive processing
             */
            $streamProcessor = new self($xObject->getCanvas(), $this->_colorInspector);
            $streamProcessor->process();
        }
    }

    /**
     * Callback for inline image operator
     *
     * @param $args
     */
    public function _startInlineImageData($args)
    {
        $dict = new \SetaPDF_Core_Type_Dictionary();

        for ($i = 0, $c = count($args); $i < $c; $i += 2) {
            $dict[$args[$i]] = $args[$i + 1];
        }

        $colorSpace = $dict->offsetExists('CS') ? $dict->getValue('CS') : $dict->getValue('ColorSpace');
        if (null === $colorSpace) {
            return;
        }

        $colorSpace = $colorSpace->getValue();

        switch ($colorSpace) {
            case 'G':
                $colorSpace = 'DeviceGray';
                break;
            case 'RGB':
                $colorSpace = 'DeviceRGB';
                break;
            case 'CMYK':
                $colorSpace = 'DeviceCMYK';
                break;
            case 'I':
                $colorSpace = 'Indexed';
                break;
        }

        $info = 'Color space of an inline image in content stream.';
        $this->_colorInspector->addFoundColor($colorSpace, \SetaPDF_Core_ColorSpace::createByDefinition($colorSpace), $info);
    }

    /**
     * Callback for shading operator
     *
     * @param array $args
     */
    public function _paintShapeAndColourShading($args)
    {
        $name = $args[0]->getValue();
        $shadings = $this->_canvas->getResources(true, false, \SetaPDF_Core_Resource::TYPE_SHADING);

        if ($shadings === false) {
            return;
        }

        $shadingIndirectObject = $shadings->getValue($name);
        if (!($shadingIndirectObject instanceof \SetaPDF_Core_Type_IndirectReference)) {
            return;
        }

        try {
            /** @var \SetaPDF_Core_Type_Dictionary $shading */
            $shading = $shadingIndirectObject->ensure();
        } catch (\SetaPDF_Core_Type_IndirectReference_Exception $e) {
            return;
        }

        if ($shading instanceof \SetaPDF_Core_Type_Stream) {
            $shading = $shading->getValue();
        }

        $colorSpaceValue = $shading->getValue('ColorSpace');
        if ($colorSpaceValue === null) {
            return;
        }

        $colorSpace = \SetaPDF_Core_ColorSpace::createByDefinition($colorSpaceValue);
        $info = 'Paint shading operator in content stream.';
        $this->_resolveColorSpace($colorSpace, $info);
    }

    /**
     * Process the content stream
     */
    public function process()
    {
        try {
            $stream = $this->_canvas->getStream();
        } catch (SetaPDF_Core_Filter_Exception $e) {
            // if a stream cannot be unfiltered, we ignore it
            return;
        }

        $this->_parser = new \SetaPDF_Core_Parser_Content($stream);

        /* Register colorspace operators
         * f.g. -> /DeviceRGB CS   % Set DeviceRGB colour space
         */
        $this->_parser->registerOperator(
            ['CS', 'cs'],
            [$this, '_colorSpace']
        );

        /* Register default color space operators */
        $this->_parser->registerOperator(
            ['G', 'g', 'RG', 'rg', 'K', 'k'],
            [$this, '_color']
        );

        /* Register draw operator for XObjects */
        $this->_parser->registerOperator('Do', [$this, '_paintXObject']);

        /* Inline image */
        $this->_parser->registerOperator('ID', [$this, '_startInlineImageData']);

        /* Shading Operator */
        $this->_parser->registerOperator('sh', [$this, '_paintShapeAndColourShading']);

        $this->_parser->process();
    }
}

ColorInspector.php

PHP
<?php
/**
 * Class ColorInspector
 */
class ColorInspector
{
    /**
     * @var \SetaPDF_Core_Document
     */
    protected $_document;

    /**
     * All found color definitions
     *
     * @var array
     */
    protected $_colors = [];

    /**
     * Information about the currently processed "location"
     *
     * @var string
     */
    protected $_currentLocation;

    /**
     * The constructor
     *
     * @param \SetaPDF_Core_Document $document
     */
    public function __construct(\SetaPDF_Core_Document $document)
    {
        $this->_document = $document;
    }

    /**
     * Get all used colors
     *
     * @param bool $processAnnotations Set to false to ignore color definitions in annotation appearance streams
     * @param null|int $maxPages The maximum of pages to process
     * @return array
     */
    public function getColors($processAnnotations = true, $maxPages = null)
    {
        $pages = $this->_document->getCatalog()->getPages();

        $pageCount = $pages->count();
        $maxPages = $maxPages === null ? $pageCount : min($maxPages, $pageCount);

        for ($pageNo = 1; $pageNo <= $maxPages; $pageNo++) {
            $this->_currentLocation = 'Page ' . $pageNo;

            $page = $pages->getPage($pageNo);
            $canvas = $page->getCanvas();
            $streamProcessor = new \StreamProcessor($canvas, $this);
            $streamProcessor->process();

            if (false == $processAnnotations)
                continue;

            $annotations = $page->getAnnotations();
            $allAnnotations = $annotations->getAll();
            foreach ($allAnnotations AS $annotation) {
                $dict = $annotation->getDictionary();
                $ap = $dict->getValue('AP');
                if (null === $ap)
                    continue;

                $this->_currentLocation = 'Annotation (' . $dict->getValue('Subtype')->getValue() . ') on Page ' . $pageNo;

                foreach ($ap AS $type => $value) {
                    $object = $value->ensure();
                    if ($object instanceof \SetaPDF_Core_Type_Stream) {
                        $streamProcessor = new \StreamProcessor($annotation->getAppearance($type)->getCanvas(), $this);
                        $streamProcessor->process();

                    } elseif ($object instanceof \SetaPDF_Core_Type_Dictionary) {
                        foreach ($object AS $subType => $subValue) {
                            $subOject = $subValue->ensure();
                            if ($subOject instanceof \SetaPDF_Core_Type_Stream) {
                                $streamProcessor = new \StreamProcessor($annotation->getAppearance($type, $subType)->getCanvas(), $this);
                                $streamProcessor->process();
                            }
                        }
                    }
                }
            }
        }

        return $this->_colors;
    }

    /**
     * A method which will register found color definitions.
     *
     * @param $colorSpace
     * @param null $data
     * @param null $info
     */
    public function addFoundColor($colorSpace, $data = null, $info = null)
    {
        $this->_colors[] = [
            'colorSpace' => $colorSpace,
            'data' => $data,
            'info' => $info,
            'location' => $this->_currentLocation,
        ];
    }
}

Try it!

Select or upload a file

The uploaded files are bound to your browser session and are not accessible by any other user. They will get deleted after 24 hours automatically.

Loading...