Макрос импорта из MS Word

Материал из свободной русской энциклопедии «Традиция»
Перейти к навигации Перейти к поиску

Этот макрос импорта (доработанный макрос от http://www.infpro.com/downloads/downloads/wordmedia.htm), пытается эвристически преобразовать MS Word разметку в текстовую разметку Традиция.

Скопируйте текст макроса в буфер обмена, перейдите в Word, откройте Редактор Visual Basic клавишами Alt-F11, вставьте текст в шаблон Normal. Сохраните шаблон, затем используйте Alt-F8 для вызова и запуска макроса.

Обратите внимание: макрос «разрушает» исходный документ, преобразовывая его в текстовую разметку Традиция, поэтому позаботьтесь о backup-е!

<code-vb> Option Explicit

Sub Word2MediaWiki()

   Application.ScreenUpdating = False
   MediaWikiEscapeChars
   SplitParagraphs
   MediaWikiConvertHyperlinks
   MediaWikiConvertItalic
   MediaWikiConvertBold
   MediaWikiConvertH1
   MediaWikiConvertH2
   MediaWikiConvertH3
   MediaWikiConvertH4
   MediaWikiConvertH5
   MediaWikiConvertUnderline
   MediaWikiConvertStrikeThrough
   MediaWikiConvertSuperscript
   MediaWikiConvertSubscript
   MediaWikiConvertLists
   MediaWikiConvertTables
   ' Copy to clipboard
  ActiveDocument.Content.Copy
  Application.ScreenUpdating = True

End Sub

Private Sub MediaWikiConvertH1()

   ReplaceHeading wdStyleHeading1, "="

End Sub

Private Sub MediaWikiConvertH2()

   ReplaceHeading wdStyleHeading2, "=="

End Sub

Private Sub MediaWikiConvertH3()

   ReplaceHeading wdStyleHeading3, "==="

End Sub

Private Sub MediaWikiConvertH4()

   ReplaceHeading wdStyleHeading4, "===="

End Sub

Private Sub MediaWikiConvertH5()

   ReplaceHeading wdStyleHeading5, "====="

End Sub

Private Sub MediaWikiConvertBold()

   ActiveDocument.Select
   With Selection.Find
       .ClearFormatting
       .Font.Bold = True
       .Text = ""
       .Format = True
       .MatchCase = False
       .MatchWholeWord = False
       .MatchWildcards = False
       .MatchSoundsLike = False
       .MatchAllWordForms = False
       .Forward = True
       .Wrap = wdFindContinue
       Do While .Execute
           With Selection
               If Len(.Text) > 1 And InStr(1, .Text, vbCr) Then
                   ' Just process the chunk before any newline characters
                   ' We'll pick-up the rest with the next search
                   .Collapse
                   .MoveEndUntil vbCr
               End If
               ' Don't bother to markup newline characters (prevents a loop, as well)
               If Not .Text = vbCr Then
                   .InsertBefore ""
                   .InsertAfter ""
               End If
               .style = ActiveDocument.styles(wdStyleDefaultParagraphFont)
               .Font.Bold = False
           End With
       Loop
   End With

End Sub

Private Sub MediaWikiConvertItalic()

   ActiveDocument.Select
   With Selection.Find
       .ClearFormatting
       .Font.Italic = True
       .Text = ""
       .Format = True
       .MatchCase = False
       .MatchWholeWord = False
       .MatchWildcards = False
       .MatchSoundsLike = False
       .MatchAllWordForms = False
       .Forward = True
       .Wrap = wdFindContinue
       Do While .Execute
           With Selection
               If Len(.Text) > 1 And InStr(1, .Text, vbCr) Then
                   ' Just process the chunk before any newline characters
                   ' We'll pick-up the rest with the next search
                   .Collapse
                   .MoveEndUntil vbCr
               End If
                                   
               ' Don't bother to markup newline characters (prevents a loop, as well)
               If Not .Text = vbCr Then
                   .InsertBefore ""
                   .InsertAfter ""
               End If
               .style = ActiveDocument.styles(wdStyleDefaultParagraphFont)
               .Font.Italic = False
           End With
       Loop
   End With

End Sub

Private Sub MediaWikiConvertUnderline()

   ActiveDocument.Select
   With Selection.Find
       .ClearFormatting
       .Font.Underline = True
       .Text = ""
       .Format = True
       .MatchCase = False
       .MatchWholeWord = False
       .MatchWildcards = False
       .MatchSoundsLike = False
       .MatchAllWordForms = False
       .Forward = True
       .Wrap = wdFindContinue
       Do While .Execute
           With Selection
              If Len(.Text) > 1 And InStr(1, .Text, vbCr) Then
                   ' Just process the chunk before any newline characters
                   ' We'll pick-up the rest with the next search
                   .Collapse
                   .MoveEndUntil vbCr
               End If
               ' Don't bother to markup newline characters (prevents a loop, as well)
               If Not .Text = vbCr Then
                   .InsertBefore ""
                   .InsertAfter ""
               End If
               .style = ActiveDocument.styles(wdStyleDefaultParagraphFont)
               .Font.Underline = False
           End With
       Loop
   End With

End Sub

Private Sub MediaWikiConvertStrikeThrough()

   ActiveDocument.Select
   With Selection.Find
       .ClearFormatting
       .Font.StrikeThrough = True
       .Text = ""
       .Format = True
       .MatchCase = False
       .MatchWholeWord = False
       .MatchWildcards = False
       .MatchSoundsLike = False
       .MatchAllWordForms = False
       .Forward = True
       .Wrap = wdFindContinue
       Do While .Execute
           With Selection
               If Len(.Text) > 1 And InStr(1, .Text, vbCr) Then
                   ' Just process the chunk before any newline characters
                   ' We'll pick-up the rest with the next search
                   .Collapse
                   .MoveEndUntil vbCr
               End If
                                     
               ' Don't bother to markup newline characters (prevents a loop, as well)
               If Not .Text = vbCr Then
                   .InsertBefore "-"
                   .InsertAfter "-"
               End If
               
               .style = ActiveDocument.styles(wdStyleDefaultParagraphFont)
               .Font.StrikeThrough = False
           End With
       Loop
   End With

End Sub

Private Sub MediaWikiConvertSuperscript()

   ActiveDocument.Select
   With Selection.Find
       .ClearFormatting
       .Font.Superscript = True
       .Text = ""
       .Format = True
       .MatchCase = False
       .MatchWholeWord = False
       .MatchWildcards = False
       .MatchSoundsLike = False
       .MatchAllWordForms = False
       .Forward = True
       .Wrap = wdFindContinue
       Do While .Execute
           With Selection
               .Text = Trim(.Text)
               If Len(.Text) > 1 And InStr(1, .Text, vbCr) Then
                   ' Just process the chunk before any newline characters
                   ' We'll pick-up the rest with the next search
                   .Collapse
                   .MoveEndUntil vbCr
               End If
               ' Don't bother to markup newline characters (prevents a loop, as well)
               If Not .Text = vbCr Then
                   .InsertBefore "^"
                   .InsertAfter "^"
               End If
               .style = ActiveDocument.styles(wdStyleDefaultParagraphFont)
               .Font.Superscript = False
           End With
       Loop
   End With

End Sub

Private Sub MediaWikiConvertSubscript()

   ActiveDocument.Select
   With Selection.Find
       .ClearFormatting
       .Font.Subscript = True
       .Text = ""
      
       .Format = True
       .MatchCase = False
       .MatchWholeWord = False
       .MatchWildcards = False
       .MatchSoundsLike = False
       .MatchAllWordForms = False
      
       .Forward = True
       .Wrap = wdFindContinue
      
       Do While .Execute
           With Selection
               .Text = Trim(.Text)
               If Len(.Text) > 1 And InStr(1, .Text, vbCr) Then
                   ' Just process the chunk before any newline characters
                   ' We'll pick-up the rest with the next search
                   .Collapse
                   .MoveEndUntil vbCr
               End If
                                      
               ' Don't bother to markup newline characters (prevents a loop, as well)
               If Not .Text = vbCr Then
                   .InsertBefore "~"
                   .InsertAfter "~"
               End If
              
               .style = ActiveDocument.styles(wdStyleDefaultParagraphFont)
               .Font.Subscript = False
           End With
       Loop
   End With

End Sub

Private Sub MediaWikiConvertLists()

   Dim para As Paragraph
   Dim i As Integer
   For Each para In ActiveDocument.ListParagraphs
       With para.Range
           .InsertBefore " "
           For i = 1 To .ListFormat.ListLevelNumber
               If .ListFormat.ListType = wdListBullet Then
                   .InsertBefore "*"
               Else
                   .InsertBefore "#"
               End If
           Next i
           .ListFormat.RemoveNumbers
       End With
   Next para

End Sub

Private Sub MediaWikiConvertTables()

   Dim thisTable As Table
   Dim aRow, aCell As Object
   For Each thisTable In ActiveDocument.Tables
       With thisTable
           For Each aRow In thisTable.Rows
               With aRow
               For Each aCell In aRow.Cells
                   With aCell
                       aCell.Range.InsertBefore "|"
                       'aCell.Range.InsertAfter "|"
                   End With
               Next aCell
               '.Range.InsertBefore "|"
               .Range.InsertAfter vbCrLf + "|-"
               End With
           Next aRow
       .Range.InsertBefore "{|" + vbCrLf
       .Range.InsertAfter vbCrLf + "|}"
       .ConvertToText "|"
       End With
   Next thisTable

End Sub

Private Sub MediaWikiConvertHyperlinks()

   Dim hyperCount As Integer
   Dim i As Integer
  
   hyperCount = ActiveDocument.Hyperlinks.Count
   
   For i = 1 To hyperCount
       With ActiveDocument.Hyperlinks(1)
           Dim addr As String
           addr = .Address
           .Delete
           .Range.InsertBefore "["
           .Range.InsertAfter "-" & addr & "]"
       End With
   Next i

End Sub

Private Sub MediaWikiEscapeChars()

   EscapeCharacter "*"
   EscapeCharacter "#"
   EscapeCharacter "{"
   EscapeCharacter "}"
   EscapeCharacter "["
   EscapeCharacter "]"
   EscapeCharacter "~"
   EscapeCharacter "^^"
   EscapeCharacter "|"

End Sub

Private Function ReplaceHeading(styleHeading As String, headerPrefix As String)

   Dim normalStyle As style
   Set normalStyle = ActiveDocument.styles(wdStyleNormal)
  
   ActiveDocument.Select
  
   With Selection.Find
  
       .ClearFormatting
       .style = ActiveDocument.styles(styleHeading)
       .Text = ""

      
       .Format = True
       .MatchCase = False
       .MatchWholeWord = False
       .MatchWildcards = False
       .MatchSoundsLike = False
       .MatchAllWordForms = False
      
       .Forward = True
       .Wrap = wdFindContinue
      
       Do While .Execute
           With Selection
               If InStr(1, .Text, vbCr) Then
                   ' Just process the chunk before any newline characters
                   ' We'll pick-up the rest with the next search
                   .Collapse
                   .MoveEndUntil vbCr
               End If
                                      
               ' Don't bother to markup newline characters (prevents a loop, as well)
               If Not .Text = vbCr Then
                   .InsertBefore headerPrefix
                   .InsertBefore vbCr
                   .InsertAfter headerPrefix
               End If
               .style = normalStyle
           End With
       Loop
   End With

End Function

Private Function SplitParagraphs()

 'All wdStyleNormal -> wdStyleNormalIndent
  Dim styles As New Collection
  styles.Add (wdStylePlainText)
  styles.Add (wdStyleNormal)
  styles.Add (wdStyleBodyText)
  styles.Add (wdStyleBodyText2)
  styles.Add (wdStyleBodyText3)
  Dim style As Variant
  For Each style In styles
       ActiveDocument.Select
       With Selection.Find
             .ClearFormatting
             .style = ActiveDocument.styles(wdStyleNormal)
             .Text = ""
             .Format = True
             .MatchCase = False
             .MatchWholeWord = False
             .MatchWildcards = False
             .MatchSoundsLike = False
             .MatchAllWordForms = False
             .Forward = True
             .Wrap = wdFindContinue
             Do While .Execute
                   With Selection
                     Dim fl As Variant
                     Set fl = .Range.ListFormat
                     If IsEmpty(fl) Then
                       .style = ActiveDocument.styles(wdStyleNormalIndent)
                     End If
                   End With
             Loop
        End With
   Next
  
   ActiveDocument.Select
   With Selection.Find
       Dim dbCr As Variant
       dbCr = vbCr + vbCr
       .ClearFormatting
       .style = ActiveDocument.styles(wdStyleNormalIndent)
       .Text = ""
       .Format = True
       .MatchCase = False
       .MatchWholeWord = False
       .MatchWildcards = False
       .MatchSoundsLike = False
       .MatchAllWordForms = False
      
       .Forward = True
       .Wrap = wdFindContinue
      
       Do While .Execute
           With Selection
               .InsertBefore vbCr
               .style = ActiveDocument.styles(wdStyleNormal)
           End With
       Loop
    End With

End Function


Private Function EscapeCharacter(char As String)

   ReplaceString char, "\" & char

End Function

Private Function ReplaceString(findStr As String, replacementStr As String)

   Selection.Find.ClearFormatting
   Selection.Find.Replacement.ClearFormatting
   With Selection.Find
       .Text = findStr
       .Replacement.Text = replacementStr
       .Forward = True
       .Wrap = wdFindContinue
       .Format = False
       .MatchCase = False
       .MatchWholeWord = False
       .MatchWildcards = False
       .MatchSoundsLike = False
       .MatchAllWordForms = False
   End With
   Selection.Find.Execute Replace:=wdReplaceAll

End Function </code-vb>



По крайней мере часть этого текста взята с ресурса http://lib.custis.ru/ под лицензией GDFL.Список авторов доступен на этом ресурсе в статье под тем же названием.