Monday, November 28, 2011

Use Powershell to configure PDF Search in Sharepoint 2010

This following PowerShell script automates the task of configuring PDF search in your Sharepoint 2010 farm. The script will download a pdf icon from adobe and make it available for the sharepoint farm by updating the docicons.xml file. It will also download and install the Adobe iFilter for PDF.  Finally it will add "PDF" to the list of search extensions to your Search Appliation etc. This script should be run on the backend indexing server.


This script adds the functionality to crawl and index content of pdf files in SharePoint 2010
The script will:

  1. Download and install iFilter 9 x64 from Adobe;
  2. Download and install the pdf icon from Adobe (including adding the pdf icon file to the docicons.xml file);
  3. Add the pdf extension to the list of documents to be indexed to the Sharepoint 2010 Search Application;
  4. Register pdf filter for SharePoint Search in the registry;
  5. Restart the SP search service and IIS.
  1. The script should be run on all indexing servers in the sharepoint farm to install the iFilter;
  2. The account that runs this script needs to be SP Farm administrator and to have administrator privileges on all servers in the farm;
  3. The script is tested and works fine on Windows Server 2008 R2 64-bit.
Source information
http://itbloggen.se/cs/blogs/josko/archive/2011/02/02/use-powershell-to-configure-pdf-search-in-sharepoint-2010.aspx



Below I show you the PowerShell code. You can copy & past the text between the lines to a file and name it Configure-pdf-search.ps1 for example.


--------------------------------------------------------------------------------

# This script adds the functionality to crawl and index content of pdf files in SharePoint 2010


# The script will:
# - download and install iFilter 9 x64 from Adobe
# - download and install the pdf icon from Adobe (including adding the pdf icon file to the docicons.xml file)
# - add the pdf extension to the list of documents to be indexed to the Sharepoint 2010 Search Application
# - register pdf filter for SharePoint Search in the registry
# - restart the SP search service and IIS
#
# The script should be run on all indexing servers in the sharepoint farm to install the iFilter
# The account that runs this script needs to be SP Farm administrator and to have administrator privileges on all servers in the farm
# The script is tested and works fine on Windows Server 2008 R2 64-bit


# configuration
$tempfolder = "C:\temp"

# create the temp folder if it doesnt exist
Get-Item $tempfolder -ErrorVariable err -ErrorAction "SilentlyContinue" | Out-Null
if ([String]::IsNullOrEmpty($err) -eq $false){ 
new-item -type directory -path $tempfolder -ErrorVariable err -ErrorAction "SilentlyContinue" | Out-Null
$err = ""
}


function RestartIIS(){
$title = "Restart IIS"
$message = "Do you want to restart the local IIS?"
$yes = New-Object System.Management.Automation.Host.ChoiceDescription "&Yes","Restarts IIS."
$no = New-Object System.Management.Automation.Host.ChoiceDescription "&No","Silently continues..."
$options = [System.Management.Automation.Host.ChoiceDescription[]]($yes, $no)
$result = $host.ui.PromptForChoice($title, $message, $options, 0) 
if ($result -eq 0){
iisreset /noforce
}
}

function download{
    # usage: download http://url c:\temp
param([string]$URL, [string]$destination)
    Write-Output ""
    Write-Output "Downloading $URL ..." 
    $clnt = new-object System.Net.WebClient -ErrorVariable err -ErrorAction "SilentlyContinue"
    $clnt.DownloadFile($url,$destination)
    if ([String]::IsNullOrEmpty($err) -eq $true) { Write-Output " - Download completed."} 
    else { Write-Error "Download ERROR - Check URL: $err" }
}

function Extract-Zip {
# usage: extract-zip c:\demo\myzip.zip c:\demo\destination
# originally from http://blogs.msdn.com/b/daiken/archive/2007/02/12/compress-files-with-windows-powershell-then-package-a-windows-vista-sidebar-gadget.aspx
param([string]$ZIPname, [string]$destination)
$ZIPfile = Get-Item $ZIPname  -ErrorVariable err -ErrorAction "SilentlyContinue" # gets the file as an object
    if ([String]::IsNullOrEmpty($err) -eq $false) { 
    Write-Error "ERROR: $err Cannot find $ZIPname !!!"
        EXIT
    }
    $ZIPfolder = Get-Item $destination  -ErrorVariable err -ErrorAction "SilentlyContinue" # gets the folder as an object
    if ([String]::IsNullOrEmpty($err) -eq $false) { 
    Write-Error "ERROR: $err Cannot find $ZIPfolder !!!"
        EXIT
    }
    ELSE{
$zipname = $zipfile.fullname # makes sure the path is absolute
        $zipDestination = $ZIPfolder.fullname # makes sure the destination path is absolute
        $shellApplication = new-object -com shell.application
$zipPackage = $shellApplication.NameSpace($zipname)
$destinationFolder = $shellApplication.NameSpace($ZIPdestination)
$destinationFolder.CopyHere($zipPackage.Items())
}
}

function AddSystemPaths([array] $PathsToAdd) {
  # originally from http://blogs.technet.com/b/sqlthoughts/archive/2008/12/12/powershell-function-to-add-system-path.aspx
  $VerifiedPathsToAdd = ""
  foreach ($Path in $PathsToAdd) {
    if ($Env:Path -like "*$Path*") {
      echo "  $Path already in the path"
    } 
    else {
      $VerifiedPathsToAdd += ";$Path";
    } 
  }
  if ($VerifiedPathsToAdd -ne "") {
    echo "Adding $VerifiedPathsToAdd to system path"
    [System.Environment]::SetEnvironmentVariable("PATH", $Env:Path + "$VerifiedPathsToAdd","Machine")
  }
}

function ConfigurePDFSearch {
Add-PSSnapin Microsoft.SharePoint.PowerShell -ErrorAction "SilentlyContinue" | Out-Null 
$farm = get-spfarm
    Download "http://www.adobe.com/images/pdficon_small.gif" "$tempfolder/pdf.gif"
    foreach($Server in $farm.servers){  # connecting to all application servers in the farm
        if (($Server.Role -eq "Application") -and ($Server.Status -eq "Online")){    
            Write-Output ""
            Write-Output ("Copies the PDF icon to the sharepoint folder on " + $Server.Name + "...")
            $DestFile = "\\" + $Server.name + "\c$\Program Files\Common Files\Microsoft Shared\Web Server Extensions\14\TEMPLATE\IMAGES\pdf.gif"  
            copy-item "$tempfolder\pdf.gif" -destination $DestFile -ErrorVariable err -ErrorAction "SilentlyContinue"
            if ([String]::IsNullOrEmpty($err) -eq $true) { Write-Output " - Copy operation completed."} 
            else { Write-Error "Copy ERROR: $err" }
        }
    }
    Write-Output ""
    Write-Output "Adds PDF to the list of search extensions in the Search Appliation..."
    $searchApp = Get-SPEnterpriseSearchServiceApplication
    if ([String]::IsNullOrEmpty($err) -ne $true) { Write-Error "Error: Search Application is missing / not created yet : $err" }
    $PDFcheck = get-SPEnterpriseSearchCrawlExtension "pdf" -SearchApplication $searchApp -ErrorVariable err -ErrorAction "SilentlyContinue" | Out-Null
if ([String]::IsNullOrEmpty($PDFcheck) -eq $true){
new-SPEnterpriseSearchCrawlExtension "pdf" -SearchApplication $searchApp -ErrorVariable err -ErrorAction "SilentlyContinue" | Out-Null
if ([String]::IsNullOrEmpty($err) -eq $true) { Write-Output " - Add completed."} 
   else { Write-Error "Error: $err" }
}
Else{
Write-Output " The PDF extension was already in the list"
}
    foreach($Server in $farm.servers){  # connect to all web front ends in the farm
        if (($Server.Role -eq "Application") -and ($Server.Status -eq "Online")){
            Write-Output ""
            Write-Output ("Adding pdfs as extension to docicons xml file on " + $Server.name)       
            $XMLfile = "\\" + $Server.name + "\c$\Program Files\Common Files\Microsoft Shared\Web Server Extensions\14\TEMPLATE\XML\DOCICON.XML" 
            [xml]$dociconxml = get-content  $XMLfile -ErrorVariable err -ErrorAction "SilentlyContinue"
        if ([String]::IsNullOrEmpty($err) -eq $true) {
                $PNGelement = $dociconxml.DocIcons.ByExtension.Mapping | Where-Object { $_.Key -eq "png" }
                $PDFnode = $dociconxml.DocIcons.ByExtension.Mapping | Where-Object { $_.Key -eq "pdf" }
                if ($PDFnode.key -eq "pdf"){
                    write-output " - XML document was already updated."
                }
                Else{ # add a new pdf node to the xml document
               $element = $dociconxml.DocIcons.ByExtension.Mapping[0].clone() # Duplicates an existing node
            $element.key = "pdf"
            $element.value = "pdf.gif"
            $element.OpenControl = ""
            $element.EditText = ""
            $dociconxml.DocIcons.ByExtension.InsertBefore($element,$PNGelement)  | Out-Null # Inserts the new node before the existing PNG element
               $dociconxml.save($XMLfile)
               if ([String]::IsNullOrEmpty($err) -eq $true) { Write-Output " - XML updated."} 
               else { Write-Error "Update ERROR: $err" }
                }
        }
            else { Write-Error "XML wasnt found: $err" }                 
        }
    }
    Download "http://download.adobe.com/pub/adobe/acrobat/win/9.x/PDFiFilter64installer.zip" "$tempfolder\PDFiFilter64installer.zip"
    write-output " - Unzipping file..."
    extract-zip "$tempfolder\PDFiFilter64installer.zip" $tempfolder
    write-output ""
    write-output "Running the PDF iFilter installer..."
$proc = Start-Process C:\Windows\System32\msiexec.exe " /passive /i $tempfolder\PDFFilter64installer.msi" -wait -ErrorVariable err -ErrorAction "SilentlyContinue"
if ($LASTEXITCODE -eq "0"){
Write-Output " - OK" }
else{
Write-Output " - Probably OK (Installation returned error code: $LastExitCode)" }
    
    write-output ""
    write-output "Adding the pdf dll path to system path..."
    AddSystemPaths("C:\Program Files\Adobe\Adobe PDF iFilter 9 for 64-bit platforms\bin\")
    write-output ""
write-output "Adding pdf entrys for Sharepoint Search in the registry..." 
New-Item -path registry::'HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Office Server\14.0\Search\Setup\Filters\.pdf' | Out-Null
New-ItemProperty -Path registry::'HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Office Server\14.0\Search\Setup\Filters\.pdf' -Name "Extension" -value ".pdf" -PropertyType string  | Out-Null
New-ItemProperty -Path registry::'HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Office Server\14.0\Search\Setup\Filters\.pdf' -Name "Mime Types" -value "application/pdf" -PropertyType string  | Out-Null
New-ItemProperty -Path registry::'HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Office Server\14.0\Search\Setup\Filters\.pdf' -Name "FileTypeBucket" -value "1" -PropertyType dword  | Out-Null

New-Item -Path registry::'HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Office Server\14.0\Search\Setup\ContentIndexCommon\Filters\Extension\.pdf' | Out-Null
New-ItemProperty -Path registry::'HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Office Server\14.0\Search\Setup\ContentIndexCommon\Filters\Extension\.pdf' -name "(Default)" -Value "{E8978DA6-047F-4E3D-9C78-CDBE46041603}" -PropertyType string | Out-Null

    write-output "Re-register the adobe iFilter dll..."
    regsvr32.exe "C:\Program Files\Adobe\Adobe PDF iFilter 9 for 64-bit platforms\bin\PDFFilter.dll"

    # Finally, issue an IISReset and restart the sharepoint search service
    RestartIIS
Write-Output "Restarting the Search Service..."
    Stop-Service "OSearch14"
    Start-Service "OSearch14"
    
    $exitprompt = Read-Host "Configuration complete. Press ENTER to exit"
}

ConfigurePDFSearch

write-output "Done!"
--------------------------------------------------------------------------------


No comments: