
概要・詳細はこちらを見てください
https://note.com/quicktimer/n/na7eff4e4a8bf
現象
Microsoft OfficeアプリからPDFを生成した際に
特定の文字が、意図しないToUnicodeの値が付与されて、意図しない内容になることがある
Microsoft Officeアプリ上でのフォントの指定に依存がある
(特定のフォントで発生する)
回避策
A:Microsoft Officeのドキュメントを置換が発生しないフォントに変更する
B:オンラインサービス経由でPDF化する
C:PDFになってから置換する(実質無理がある)
D:テキストエディタで気づきやすくする
ここでは
CのPDFになっている場合に、対象のPDFに問題の文字(ユニコード値が置換されて読めない文字)が無いか?をチェックするスクリプトです
| ソース | |
|---|---|
| 001 | #!/usr/bin/env osascript |
| 002 | #coding: utf-8 |
| 003 | ----+----1----+----2----+-----3----+----4----+----5----+----6----+----7-- |
| 004 | (* |
| 005 | |
| 006 | Microsoft Officeで作成されたPDFで |
| 007 | ToUnicode 逆引きバケが発生した場合 |
| 008 | 対象の文字がどこにあるか?をチェックします。 |
| 009 | |
| 010 | チェック結果はテキストで出力します。 |
| 011 | |
| 012 | v1 初回作成 |
| 013 | v1.1 マップを少し増やしてより広範囲にチェックできるようにした |
| 014 | v2 swiftコードを修正した |
| 015 | |
| 016 | |
| 017 | com.cocolog-nifty.quicktimer.icefloe *) |
| 018 | ----+----1----+----2----+-----3----+----4----+----5----+----6----+----7-- |
| 019 | use AppleScript version "2.8" |
| 020 | use framework "Foundation" |
| 021 | use framework "AppKit" |
| 022 | use framework "PDFKit" |
| 023 | use scripting additions |
| 024 | |
| 025 | property refMe : a reference to current application |
| 026 | |
| 027 | #################### |
| 028 | #ダイアログ |
| 029 | set appFileManager to refMe's NSFileManager's defaultManager() |
| 030 | set ocidURLsArray to (appFileManager's URLsForDirectory:(refMe's NSDesktopDirectory) inDomains:(refMe's NSUserDomainMask)) |
| 031 | set ocidDesktopDirPathURL to ocidURLsArray's firstObject() |
| 032 | set aliasDefaultLocation to (ocidDesktopDirPathURL's absoluteURL()) as alias |
| 033 | #UTIリスト |
| 034 | set listUTI to {"com.adobe.pdf"} as list |
| 035 | #メッセージマルチリンガル |
| 036 | set appBundle to refMe's NSBundle's bundleWithIdentifier:("com.apple.osax.standardadditions") |
| 037 | if appBundle = (missing value) then |
| 038 | set strBundlePath to ("/System/Library/ScriptingAdditions/StandardAdditions.osax") as text |
| 039 | set ocidBundlePathStr to refMe's NSString's stringWithString:(strBundlePath) |
| 040 | set ocidBundlePath to ocidBundlePathStr's stringByStandardizingPath() |
| 041 | set ocidBundlePathURL to refMe's NSURL's fileURLWithPath:(ocidBundlePath) isDirectory:(false) |
| 042 | set appBundle to refMe's NSBundle's alloc()'s initWithURL:(ocidBundlePathURL) |
| 043 | end if |
| 044 | set strChooseAFile to (appBundle's localizedStringForKey:("Choose a File") value:("Choose a File") table:("Localizable")) as text |
| 045 | |
| 046 | set strMes to ("PDF " & strChooseAFile & "") as text |
| 047 | set strPrompt to ("PDF " & strChooseAFile & "") as text |
| 048 | try |
| 049 | tell application "SystemUIServer" |
| 050 | activate |
| 051 | set aliasFilePath to (choose file strMes with prompt strPrompt default location (aliasDefaultLocation) of type listUTI with invisibles and showing package contents without multiple selections allowed) as alias |
| 052 | end tell |
| 053 | on error strErrMes number numErrNo |
| 054 | log strErrMes & numErrNo |
| 055 | return false |
| 056 | end try |
| 057 | # |
| 058 | set strFilePath to (POSIX path of aliasFilePath) as text |
| 059 | set ocidFilePathStr to refMe's NSString's stringWithString:(strFilePath) |
| 060 | set ocidFilePath to ocidFilePathStr's stringByStandardizingPath() |
| 061 | set ocidFilePathURL to refMe's NSURL's fileURLWithPath:(ocidFilePath) isDirectory:(false) |
| 062 | set ocidFileName to ocidFilePathURL's lastPathComponent() |
| 063 | set ocidBaseFileName to ocidFileName's stringByDeletingPathExtension() |
| 064 | #################### |
| 065 | #保存先 |
| 066 | set ocidContainerDirPathURL to ocidFilePathURL's URLByDeletingLastPathComponent() |
| 067 | set strSaveLogFileName to ("" & ocidBaseFileName & ".LOGs.txt") as text |
| 068 | set ocidSaveFilePathURL to ocidContainerDirPathURL's URLByAppendingPathComponent:(strSaveLogFileName) isDirectory:(false) |
| 069 | |
| 070 | #################### |
| 071 | #PDFKIT |
| 072 | set ocidOutPutString to refMe's NSMutableString's alloc()'s init() |
| 073 | #NSDATAでPDF読み込み |
| 074 | set ocidOption to (refMe's NSDataReadingMappedIfSafe) |
| 075 | set listReadData to refMe's NSData's alloc()'s initWithContentsOfURL:(ocidFilePathURL) options:(ocidOption) |error|:(reference) |
| 076 | set ocidReadData to (first item of listReadData) |
| 077 | #PDFDocumentにNSDATAをセット |
| 078 | set ocidActiveDoc to refMe's PDFDocument's alloc()'s initWithData:(ocidReadData) |
| 079 | #総ページ数 |
| 080 | set numCntPage to ocidActiveDoc's pageCount() |
| 081 | #ページ数分繰り返し |
| 082 | repeat with itemNo from 0 to (numCntPage - 1) by 1 |
| 083 | #ページを取り出して |
| 084 | set ocidActivePage to (ocidActiveDoc's pageAtIndex:(itemNo)) |
| 085 | #テキストを抽出 |
| 086 | set ocidPageText to ocidActivePage's |string|() |
| 087 | #対象ページにテキストが無い場合 |
| 088 | if ocidPageText = (missing value) then |
| 089 | set strSetValue to ("PAGE " & (itemNo + 1) & ":テキストが無いページです") as text |
| 090 | set ocidPageText to (refMe's NSString's stringWithString:(strSetValue)) |
| 091 | else |
| 092 | set strSetValue to ("PAGE " & (itemNo + 1) & ": ") as text |
| 093 | set ocidLineText to (refMe's NSMutableString's stringWithString:(strSetValue)) |
| 094 | #改行をLFに強制して |
| 095 | set ocidPageText to (ocidPageText's stringByReplacingOccurrencesOfString:(return & linefeed) withString:(linefeed)) |
| 096 | set ocidPageText to (ocidPageText's stringByReplacingOccurrencesOfString:(return) withString:(linefeed)) |
| 097 | #SWIFTに通すために改行をエスケープさせておく |
| 098 | set ocidPageText to (ocidPageText's stringByReplacingOccurrencesOfString:(linefeed) withString:("\\n")) |
| 099 | #SWIFTにして |
| 100 | set strSwiftCode to ("import Foundation;let arrayBlackList: [ClosedRange<UInt32>] = [0x2E80...0x2EFF,0x2F00...0x2FDF,0x202A...0x202E,0x200B...0x200D,0x2060...0x206F,0xF900...0xFAFF,0x20000...0x2A6DF];var arraylogs: [String] = [];let argString = \"" & ocidPageText & "\";for charScalar in argString.unicodeScalars {let uintValue = charScalar.value;if arrayBlackList.contains(where: {$0.contains(uintValue) }) {let strLogEntry = \"[\\(charScalar)] (U+\\(String(format: \"%X\", uintValue)))\";arraylogs.append(strLogEntry);};};let strReturn = arraylogs.joined(separator: \"\\n\");print(strReturn);") as text |
| 101 | |
| 102 | set strStdOut to (do shell script "/usr/bin/swift -e '" & strSwiftCode & "'") as text |
| 103 | if strStdOut is "" then |
| 104 | (ocidLineText's appendString:("-OK-")) |
| 105 | else |
| 106 | (ocidLineText's appendString:(strStdOut)) |
| 107 | end if |
| 108 | (ocidOutPutString's appendString:(ocidLineText)) |
| 109 | (ocidOutPutString's appendString:(linefeed)) |
| 110 | end if |
| 111 | end repeat |
| 112 | |
| 113 | #チェック結果をテキストに保存して |
| 114 | set listDone to ocidOutPutString's writeToURL:(ocidSaveFilePathURL) atomically:(true) encoding:(refMe's NSUTF8StringEncoding) |error|:(reference) |
| 115 | if (item 1 of listDone) is true then |
| 116 | #テキストを開く |
| 117 | set appSharedWorkspace to refMe's NSWorkspace's sharedWorkspace() |
| 118 | set boolDone to appSharedWorkspace's openURL:(ocidSaveFilePathURL) |
| 119 | log "正常終了" |
| 120 | return boolDone |
| 121 | else if (item 1 of listDone) is false then |
| 122 | log (item 2 of listDone)'s localizedDescription() as text |
| 123 | return "保存に失敗しました" |
| 124 | end if |
| 125 | |
| AppleScriptで生成しました | |
| ソース | |
|---|---|
| 001 | #!/usr/bin/env osascript |
| 002 | #coding: utf-8 |
| 003 | ----+----1----+----2----+-----3----+----4----+----5----+----6----+----7-- |
| 004 | (* |
| 005 | |
| 006 | Microsoft Officeで作成されたPDFで |
| 007 | ToUnicode 逆引きバケが発生した場合 |
| 008 | 対象の文字がどこにあるか?をチェックします。 |
| 009 | |
| 010 | チェック結果はテキストで出力します。 |
| 011 | |
| 012 | v1 初回作成 |
| 013 | v1.1 マップを少し増やしてより広範囲にチェックできるようにした |
| 014 | |
| 015 | |
| 016 | |
| 017 | com.cocolog-nifty.quicktimer.icefloe *) |
| 018 | ----+----1----+----2----+-----3----+----4----+----5----+----6----+----7-- |
| 019 | use AppleScript version "2.8" |
| 020 | use framework "Foundation" |
| 021 | use framework "AppKit" |
| 022 | use framework "PDFKit" |
| 023 | use scripting additions |
| 024 | |
| 025 | property refMe : a reference to current application |
| 026 | |
| 027 | #################### |
| 028 | #ダイアログ |
| 029 | set appFileManager to refMe's NSFileManager's defaultManager() |
| 030 | set ocidURLsArray to (appFileManager's URLsForDirectory:(refMe's NSDesktopDirectory) inDomains:(refMe's NSUserDomainMask)) |
| 031 | set ocidDesktopDirPathURL to ocidURLsArray's firstObject() |
| 032 | set aliasDefaultLocation to (ocidDesktopDirPathURL's absoluteURL()) as alias |
| 033 | #UTIリスト |
| 034 | set listUTI to {"com.adobe.pdf"} as list |
| 035 | #メッセージマルチリンガル |
| 036 | set appBundle to refMe's NSBundle's bundleWithIdentifier:("com.apple.osax.standardadditions") |
| 037 | if appBundle = (missing value) then |
| 038 | set strBundlePath to ("/System/Library/ScriptingAdditions/StandardAdditions.osax") as text |
| 039 | set ocidBundlePathStr to refMe's NSString's stringWithString:(strBundlePath) |
| 040 | set ocidBundlePath to ocidBundlePathStr's stringByStandardizingPath() |
| 041 | set ocidBundlePathURL to refMe's NSURL's fileURLWithPath:(ocidBundlePath) isDirectory:(false) |
| 042 | set appBundle to refMe's NSBundle's alloc()'s initWithURL:(ocidBundlePathURL) |
| 043 | end if |
| 044 | set strChooseAFile to (appBundle's localizedStringForKey:("Choose a File") value:("Choose a File") table:("Localizable")) as text |
| 045 | |
| 046 | set strMes to ("PDF " & strChooseAFile & "") as text |
| 047 | set strPrompt to ("PDF " & strChooseAFile & "") as text |
| 048 | try |
| 049 | tell application "SystemUIServer" |
| 050 | activate |
| 051 | set aliasFilePath to (choose file strMes with prompt strPrompt default location (aliasDefaultLocation) of type listUTI with invisibles and showing package contents without multiple selections allowed) as alias |
| 052 | end tell |
| 053 | on error strErrMes number numErrNo |
| 054 | log strErrMes & numErrNo |
| 055 | return false |
| 056 | end try |
| 057 | # |
| 058 | set strFilePath to (POSIX path of aliasFilePath) as text |
| 059 | set ocidFilePathStr to refMe's NSString's stringWithString:(strFilePath) |
| 060 | set ocidFilePath to ocidFilePathStr's stringByStandardizingPath() |
| 061 | set ocidFilePathURL to refMe's NSURL's fileURLWithPath:(ocidFilePath) isDirectory:(false) |
| 062 | set ocidFileName to ocidFilePathURL's lastPathComponent() |
| 063 | set ocidBaseFileName to ocidFileName's stringByDeletingPathExtension() |
| 064 | #################### |
| 065 | #保存先 |
| 066 | set ocidContainerDirPathURL to ocidFilePathURL's URLByDeletingLastPathComponent() |
| 067 | set strSaveLogFileName to ("" & ocidBaseFileName & ".LOGs.txt") as text |
| 068 | set ocidSaveFilePathURL to ocidContainerDirPathURL's URLByAppendingPathComponent:(strSaveLogFileName) isDirectory:(false) |
| 069 | |
| 070 | #################### |
| 071 | #PDFKIT |
| 072 | set ocidOutPutString to refMe's NSMutableString's alloc()'s init() |
| 073 | #NSDATAでPDF読み込み |
| 074 | set ocidOption to (refMe's NSDataReadingMappedIfSafe) |
| 075 | set listReadData to refMe's NSData's alloc()'s initWithContentsOfURL:(ocidFilePathURL) options:(ocidOption) |error|:(reference) |
| 076 | set ocidReadData to (first item of listReadData) |
| 077 | #PDFDocumentにNSDATAをセット |
| 078 | set ocidActiveDoc to refMe's PDFDocument's alloc()'s initWithData:(ocidReadData) |
| 079 | #総ページ数 |
| 080 | set numCntPage to ocidActiveDoc's pageCount() |
| 081 | #ページ数分繰り返し |
| 082 | repeat with itemNo from 0 to (numCntPage - 1) by 1 |
| 083 | #ページを取り出して |
| 084 | set ocidActivePage to (ocidActiveDoc's pageAtIndex:(itemNo)) |
| 085 | #テキストを抽出 |
| 086 | set ocidPageText to ocidActivePage's |string|() |
| 087 | #対象ページにテキストが無い場合 |
| 088 | if ocidPageText = (missing value) then |
| 089 | set strSetValue to ("PAGE " & (itemNo + 1) & ":テキストが無いページです") as text |
| 090 | set ocidPageText to (refMe's NSString's stringWithString:(strSetValue)) |
| 091 | else |
| 092 | set strSetValue to ("PAGE " & (itemNo + 1) & ": ") as text |
| 093 | set ocidLineText to (refMe's NSMutableString's stringWithString:(strSetValue)) |
| 094 | #改行をLFに強制して |
| 095 | set ocidPageText to (ocidPageText's stringByReplacingOccurrencesOfString:(return & linefeed) withString:(linefeed)) |
| 096 | set ocidPageText to (ocidPageText's stringByReplacingOccurrencesOfString:(return) withString:(linefeed)) |
| 097 | #SWIFTに通すために改行をエスケープさせておく |
| 098 | set ocidPageText to (ocidPageText's stringByReplacingOccurrencesOfString:(linefeed) withString:("\\\\n")) |
| 099 | #SWIFTにして |
| 100 | set strSwiftCode to ("/bin/bash -c \"/bin/echo 'import Foundation;import PDFKit;let arrayBlackList: [ClosedRange<UInt32>] = [0x2E80...0x2EFF,0x2F00...0x2FDF,0x202A...0x202E,0x200B...0x200D,0x2060...0x206F,0xF900...0xFAFF,0x20000...0x2A6DF];var arraylogs: [String] = [];let argString = \\\"" & ocidPageText & "\\\";for charScalar in argString.unicodeScalars {let uintValue = charScalar.value;if arrayBlackList.contains(where: {\\$0.contains(uintValue) }) {let strLogEntry = \\\"[\\\\(charScalar)] (U+\\\\(String(format: \\\"%X\\\", uintValue)))\\\";arraylogs.append(strLogEntry);};};let strReturn = arraylogs.joined(separator: \\\"\\\\n\\\");print(strReturn);'\"|/usr/bin/swift -") as text |
| 101 | #チェック実行 |
| 102 | set strStdOut to (do shell script strSwiftCode) |
| 103 | (ocidLineText's appendString:(strStdOut)) |
| 104 | (ocidOutPutString's appendString:(ocidLineText)) |
| 105 | (ocidOutPutString's appendString:(linefeed)) |
| 106 | end if |
| 107 | end repeat |
| 108 | |
| 109 | #チェック結果をテキストに保存して |
| 110 | set listDone to ocidOutPutString's writeToURL:(ocidSaveFilePathURL) atomically:(true) encoding:(refMe's NSUTF8StringEncoding) |error|:(reference) |
| 111 | if (item 1 of listDone) is true then |
| 112 | #テキストを開く |
| 113 | set appSharedWorkspace to refMe's NSWorkspace's sharedWorkspace() |
| 114 | set boolDone to appSharedWorkspace's openURL:(ocidSaveFilePathURL) |
| 115 | log "正常終了" |
| 116 | return boolDone |
| 117 | else if (item 1 of listDone) is false then |
| 118 | log (item 2 of listDone)'s localizedDescription() as text |
| 119 | return "保存に失敗しました" |
| 120 | end if |
| 121 | |
| AppleScriptで生成しました | |
