20260311

Microsoft OfficeアプリからPDF作成した場合の『ToUnicode 逆引きバケ』が無いか?チェックするAppleScript(Swiftワンライナー)



概要・詳細はこちらを見てください
https://note.com/quicktimer/n/na7eff4e4a8bf


現象
Microsoft OfficeアプリからPDFを生成した際に
特定の文字が、意図しないToUnicodeの値が付与されて、意図しない内容になることがある
Microsoft Officeアプリ上でのフォントの指定に依存がある
(特定のフォントで発生する)
回避策
A:Microsoft Officeのドキュメントを置換が発生しないフォントに変更する
B:オンラインサービス経由でPDF化する
C:PDFになってから置換する(実質無理がある)
D:テキストエディタで気づきやすくする
ここでは
CのPDFになっている場合に、対象のPDFに問題の文字(ユニコード値が置換されて読めない文字)が無いか?をチェックするスクリプトです

【スクリプトエディタで開く】 |

CMAP置換チェックv2.scpt
ソース
001#!/usr/bin/env osascript
002#coding: utf-8
003----+----1----+----2----+-----3----+----4----+----5----+----6----+----7--
004(*
005
006Microsoft Officeで作成されたPDFで
007ToUnicode 逆引きバケが発生した場合
008対象の文字がどこにあるか?をチェックします。
009
010チェック結果はテキストで出力します。
011
012v1 初回作成
013v1.1 マップを少し増やしてより広範囲にチェックできるようにした
014v2 swiftコードを修正した
015
016
017com.cocolog-nifty.quicktimer.icefloe *)
018----+----1----+----2----+-----3----+----4----+----5----+----6----+----7--
019use AppleScript version "2.8"
020use framework "Foundation"
021use framework "AppKit"
022use framework "PDFKit"
023use scripting additions
024
025property refMe : a reference to current application
026
027####################
028#ダイアログ
029set appFileManager to refMe's NSFileManager's defaultManager()
030set ocidURLsArray to (appFileManager's URLsForDirectory:(refMe's NSDesktopDirectory) inDomains:(refMe's NSUserDomainMask))
031set ocidDesktopDirPathURL to ocidURLsArray's firstObject()
032set aliasDefaultLocation to (ocidDesktopDirPathURL's absoluteURL()) as alias
033#UTIリスト
034set listUTI to {"com.adobe.pdf"} as list
035#メッセージマルチリンガル
036set appBundle to refMe's NSBundle's bundleWithIdentifier:("com.apple.osax.standardadditions")
037if appBundle = (missing value) then
038   set strBundlePath to ("/System/Library/ScriptingAdditions/StandardAdditions.osax") as text
039   set ocidBundlePathStr to refMe's NSString's stringWithString:(strBundlePath)
040   set ocidBundlePath to ocidBundlePathStr's stringByStandardizingPath()
041   set ocidBundlePathURL to refMe's NSURL's fileURLWithPath:(ocidBundlePath) isDirectory:(false)
042   set appBundle to refMe's NSBundle's alloc()'s initWithURL:(ocidBundlePathURL)
043end if
044set strChooseAFile to (appBundle's localizedStringForKey:("Choose a File") value:("Choose a File") table:("Localizable")) as text
045
046set strMes to ("PDF " & strChooseAFile & "") as text
047set strPrompt to ("PDF " & strChooseAFile & "") as text
048try
049   tell application "SystemUIServer"
050      activate
051      set aliasFilePath to (choose file strMes with prompt strPrompt default location (aliasDefaultLocation) of type listUTI with invisibles and showing package contents without multiple selections allowed) as alias
052   end tell
053on error strErrMes number numErrNo
054   log strErrMes & numErrNo
055   return false
056end try
057#
058set strFilePath to (POSIX path of aliasFilePath) as text
059set ocidFilePathStr to refMe's NSString's stringWithString:(strFilePath)
060set ocidFilePath to ocidFilePathStr's stringByStandardizingPath()
061set ocidFilePathURL to refMe's NSURL's fileURLWithPath:(ocidFilePath) isDirectory:(false)
062set ocidFileName to ocidFilePathURL's lastPathComponent()
063set ocidBaseFileName to ocidFileName's stringByDeletingPathExtension()
064####################
065#保存先
066set ocidContainerDirPathURL to ocidFilePathURL's URLByDeletingLastPathComponent()
067set strSaveLogFileName to ("" & ocidBaseFileName & ".LOGs.txt") as text
068set ocidSaveFilePathURL to ocidContainerDirPathURL's URLByAppendingPathComponent:(strSaveLogFileName) isDirectory:(false)
069
070####################
071#PDFKIT
072set ocidOutPutString to refMe's NSMutableString's alloc()'s init()
073#NSDATAでPDF読み込み
074set ocidOption to (refMe's NSDataReadingMappedIfSafe)
075set listReadData to refMe's NSData's alloc()'s initWithContentsOfURL:(ocidFilePathURL) options:(ocidOption) |error|:(reference)
076set ocidReadData to (first item of listReadData)
077#PDFDocumentにNSDATAをセット
078set ocidActiveDoc to refMe's PDFDocument's alloc()'s initWithData:(ocidReadData)
079#総ページ数
080set numCntPage to ocidActiveDoc's pageCount()
081#ページ数分繰り返し
082repeat with itemNo from 0 to (numCntPage - 1) by 1
083   #ページを取り出して
084   set ocidActivePage to (ocidActiveDoc's pageAtIndex:(itemNo))
085   #テキストを抽出
086   set ocidPageText to ocidActivePage's |string|()
087   #対象ページにテキストが無い場合
088   if ocidPageText = (missing value) then
089      set strSetValue to ("PAGE " & (itemNo + 1) & ":テキストが無いページです") as text
090      set ocidPageText to (refMe's NSString's stringWithString:(strSetValue))
091   else
092      set strSetValue to ("PAGE " & (itemNo + 1) & ": ") as text
093      set ocidLineText to (refMe's NSMutableString's stringWithString:(strSetValue))
094      #改行をLFに強制して
095      set ocidPageText to (ocidPageText's stringByReplacingOccurrencesOfString:(return & linefeed) withString:(linefeed))
096      set ocidPageText to (ocidPageText's stringByReplacingOccurrencesOfString:(return) withString:(linefeed))
097      #SWIFTに通すために改行をエスケープさせておく
098      set ocidPageText to (ocidPageText's stringByReplacingOccurrencesOfString:(linefeed) withString:("\\n"))
099      #SWIFTにして
100      set strSwiftCode to ("import Foundation;let arrayBlackList: [ClosedRange<UInt32>] = [0x2E80...0x2EFF,0x2F00...0x2FDF,0x202A...0x202E,0x200B...0x200D,0x2060...0x206F,0xF900...0xFAFF,0x20000...0x2A6DF];var arraylogs: [String] = [];let argString = \"" & ocidPageText & "\";for charScalar in argString.unicodeScalars {let uintValue = charScalar.value;if arrayBlackList.contains(where: {$0.contains(uintValue) }) {let strLogEntry = \"[\\(charScalar)] (U+\\(String(format: \"%X\", uintValue)))\";arraylogs.append(strLogEntry);};};let strReturn = arraylogs.joined(separator: \"\\n\");print(strReturn);") as text
101      
102      set strStdOut to (do shell script "/usr/bin/swift -e '" & strSwiftCode & "'") as text
103      if strStdOut is "" then
104         (ocidLineText's appendString:("-OK-"))
105      else
106         (ocidLineText's appendString:(strStdOut))
107      end if
108      (ocidOutPutString's appendString:(ocidLineText))
109      (ocidOutPutString's appendString:(linefeed))
110   end if
111end repeat
112
113#チェック結果をテキストに保存して
114set listDone to ocidOutPutString's writeToURL:(ocidSaveFilePathURL) atomically:(true) encoding:(refMe's NSUTF8StringEncoding) |error|:(reference)
115if (item 1 of listDone) is true then
116   #テキストを開く
117   set appSharedWorkspace to refMe's NSWorkspace's sharedWorkspace()
118   set boolDone to appSharedWorkspace's openURL:(ocidSaveFilePathURL)
119   log "正常終了"
120   return boolDone
121else if (item 1 of listDone) is false then
122   log (item 2 of listDone)'s localizedDescription() as text
123   return "保存に失敗しました"
124end if
125
AppleScriptで生成しました


【スクリプトエディタで開く】 |

CMAP置換チェックv1.1.scpt
ソース
001#!/usr/bin/env osascript
002#coding: utf-8
003----+----1----+----2----+-----3----+----4----+----5----+----6----+----7--
004(*
005
006Microsoft Officeで作成されたPDFで
007ToUnicode 逆引きバケが発生した場合
008対象の文字がどこにあるか?をチェックします。
009
010チェック結果はテキストで出力します。
011
012v1 初回作成
013v1.1 マップを少し増やしてより広範囲にチェックできるようにした
014
015
016
017com.cocolog-nifty.quicktimer.icefloe *)
018----+----1----+----2----+-----3----+----4----+----5----+----6----+----7--
019use AppleScript version "2.8"
020use framework "Foundation"
021use framework "AppKit"
022use framework "PDFKit"
023use scripting additions
024
025property refMe : a reference to current application
026
027####################
028#ダイアログ
029set appFileManager to refMe's NSFileManager's defaultManager()
030set ocidURLsArray to (appFileManager's URLsForDirectory:(refMe's NSDesktopDirectory) inDomains:(refMe's NSUserDomainMask))
031set ocidDesktopDirPathURL to ocidURLsArray's firstObject()
032set aliasDefaultLocation to (ocidDesktopDirPathURL's absoluteURL()) as alias
033#UTIリスト
034set listUTI to {"com.adobe.pdf"} as list
035#メッセージマルチリンガル
036set appBundle to refMe's NSBundle's bundleWithIdentifier:("com.apple.osax.standardadditions")
037if appBundle = (missing value) then
038   set strBundlePath to ("/System/Library/ScriptingAdditions/StandardAdditions.osax") as text
039   set ocidBundlePathStr to refMe's NSString's stringWithString:(strBundlePath)
040   set ocidBundlePath to ocidBundlePathStr's stringByStandardizingPath()
041   set ocidBundlePathURL to refMe's NSURL's fileURLWithPath:(ocidBundlePath) isDirectory:(false)
042   set appBundle to refMe's NSBundle's alloc()'s initWithURL:(ocidBundlePathURL)
043end if
044set strChooseAFile to (appBundle's localizedStringForKey:("Choose a File") value:("Choose a File") table:("Localizable")) as text
045
046set strMes to ("PDF " & strChooseAFile & "") as text
047set strPrompt to ("PDF " & strChooseAFile & "") as text
048try
049   tell application "SystemUIServer"
050      activate
051      set aliasFilePath to (choose file strMes with prompt strPrompt default location (aliasDefaultLocation) of type listUTI with invisibles and showing package contents without multiple selections allowed) as alias
052   end tell
053on error strErrMes number numErrNo
054   log strErrMes & numErrNo
055   return false
056end try
057#
058set strFilePath to (POSIX path of aliasFilePath) as text
059set ocidFilePathStr to refMe's NSString's stringWithString:(strFilePath)
060set ocidFilePath to ocidFilePathStr's stringByStandardizingPath()
061set ocidFilePathURL to refMe's NSURL's fileURLWithPath:(ocidFilePath) isDirectory:(false)
062set ocidFileName to ocidFilePathURL's lastPathComponent()
063set ocidBaseFileName to ocidFileName's stringByDeletingPathExtension()
064####################
065#保存先
066set ocidContainerDirPathURL to ocidFilePathURL's URLByDeletingLastPathComponent()
067set strSaveLogFileName to ("" & ocidBaseFileName & ".LOGs.txt") as text
068set ocidSaveFilePathURL to ocidContainerDirPathURL's URLByAppendingPathComponent:(strSaveLogFileName) isDirectory:(false)
069
070####################
071#PDFKIT
072set ocidOutPutString to refMe's NSMutableString's alloc()'s init()
073#NSDATAでPDF読み込み
074set ocidOption to (refMe's NSDataReadingMappedIfSafe)
075set listReadData to refMe's NSData's alloc()'s initWithContentsOfURL:(ocidFilePathURL) options:(ocidOption) |error|:(reference)
076set ocidReadData to (first item of listReadData)
077#PDFDocumentにNSDATAをセット
078set ocidActiveDoc to refMe's PDFDocument's alloc()'s initWithData:(ocidReadData)
079#総ページ数
080set numCntPage to ocidActiveDoc's pageCount()
081#ページ数分繰り返し
082repeat with itemNo from 0 to (numCntPage - 1) by 1
083   #ページを取り出して
084   set ocidActivePage to (ocidActiveDoc's pageAtIndex:(itemNo))
085   #テキストを抽出
086   set ocidPageText to ocidActivePage's |string|()
087   #対象ページにテキストが無い場合
088   if ocidPageText = (missing value) then
089      set strSetValue to ("PAGE " & (itemNo + 1) & ":テキストが無いページです") as text
090      set ocidPageText to (refMe's NSString's stringWithString:(strSetValue))
091   else
092      set strSetValue to ("PAGE " & (itemNo + 1) & ": ") as text
093      set ocidLineText to (refMe's NSMutableString's stringWithString:(strSetValue))
094      #改行をLFに強制して
095      set ocidPageText to (ocidPageText's stringByReplacingOccurrencesOfString:(return & linefeed) withString:(linefeed))
096      set ocidPageText to (ocidPageText's stringByReplacingOccurrencesOfString:(return) withString:(linefeed))
097      #SWIFTに通すために改行をエスケープさせておく
098      set ocidPageText to (ocidPageText's stringByReplacingOccurrencesOfString:(linefeed) withString:("\\\\n"))
099      #SWIFTにして
100      set strSwiftCode to ("/bin/bash -c \"/bin/echo 'import Foundation;import PDFKit;let arrayBlackList: [ClosedRange<UInt32>] = [0x2E80...0x2EFF,0x2F00...0x2FDF,0x202A...0x202E,0x200B...0x200D,0x2060...0x206F,0xF900...0xFAFF,0x20000...0x2A6DF];var arraylogs: [String] = [];let argString = \\\"" & ocidPageText & "\\\";for charScalar in argString.unicodeScalars {let uintValue = charScalar.value;if arrayBlackList.contains(where: {\\$0.contains(uintValue) }) {let strLogEntry = \\\"[\\\\(charScalar)] (U+\\\\(String(format: \\\"%X\\\", uintValue)))\\\";arraylogs.append(strLogEntry);};};let strReturn = arraylogs.joined(separator: \\\"\\\\n\\\");print(strReturn);'\"|/usr/bin/swift -") as text
101      #チェック実行
102      set strStdOut to (do shell script strSwiftCode)
103      (ocidLineText's appendString:(strStdOut))
104      (ocidOutPutString's appendString:(ocidLineText))
105      (ocidOutPutString's appendString:(linefeed))
106   end if
107end repeat
108
109#チェック結果をテキストに保存して
110set listDone to ocidOutPutString's writeToURL:(ocidSaveFilePathURL) atomically:(true) encoding:(refMe's NSUTF8StringEncoding) |error|:(reference)
111if (item 1 of listDone) is true then
112   #テキストを開く
113   set appSharedWorkspace to refMe's NSWorkspace's sharedWorkspace()
114   set boolDone to appSharedWorkspace's openURL:(ocidSaveFilePathURL)
115   log "正常終了"
116   return boolDone
117else if (item 1 of listDone) is false then
118   log (item 2 of listDone)'s localizedDescription() as text
119   return "保存に失敗しました"
120end if
121
AppleScriptで生成しました