Cogntive Services

Text Translation

Create a CognitiveServices resource from the type TextTranslation:

Import-Module Az.CognitiveServices
$resourceGroup = "rg-Translate"
$AccountName = "acs-translation"
New-AzCognitiveServicesAccount -ResourceGroupName $resourceGroup -Name $AccountName -Type TextTranslation -SkuName F0 -Location westeurope
Start-Sleep -s 15
Add-AzCognitiveServicesAccountNetworkRule -ResourceGroupName $resourceGroup -Name $AccountName -IpAddressOrRange "200.0.0.0/24","28.2.0.0/16"
Write-Host("Ocp-Apim-Subscription-Key: " + (Get-AzCognitiveServicesAccountKey -ResourceGroupName $resourceGroup -Name $AccountName).Key1)
Write-Host("Ocp-Apim-Subscription-Region: " + (Get-AzCognitiveServicesAccount -ResourceGroupName $resourceGroup -Name $AccountName).Location)
Write-Host("Endpoint: "+(Get-AzCognitiveServicesAccount -ResourceGroupName $resourceGroup -Name $AccountName).Endpoint +"translate")

Test the translation service with this python script:

import requests, uuid

# Input variables (get this from your azure translate resource)
location = "westeurope"
key = ""
url = "https://api.cognitive.microsofttranslator.com/translate"

# Translate text in various languages
def translate(text:'str', output:'dict'=[])-> 'dict':
    body = [{ 'text': text}]
    params = {'api-version': '3.0', 'from': 'en', 'to': ['de', 'ru', 'yue', 'tlh-Latn', 'ko', 'ku', 'mn-Cyrl', 'mn-Mong', 'ne', 'th', 'yua', 'ja', 'el']} #https://docs.microsoft.com/en-us/azure/cognitive-services/translator/language-support
    headers = {'Ocp-Apim-Subscription-Key': key, 'Ocp-Apim-Subscription-Region': location, 'Content-type': 'application/json', 'X-ClientTraceId': str(uuid.uuid4())}
    request = requests.post(url, params=params, headers=headers, json=body)
    result = request.json()[0]['translations']
    dict = {}
    print("-----------------------------------------------\n| ClientTraceId:\t" + str(uuid.uuid4()))
    print("| Chars to translate:\t" + str(len(result)))
    print("| Input:\t\t" + text + "\n-----------------------------------------------")
    for language in result:
        print(language['to'] + "\t" + language['text'])
        dict[language['to']] = language['text']

    return dict

translate("This is a test azure translation test!")

Import this to Postman and add a valid Ocp-Apim-Subscription-Key to make the following example work:

{
    "info": {
        "_postman_id": "e9ea56cc-e63d-468a-a88a-5d0a3761faab",
        "name": "MS Translator",
        "schema": < https: //schema.getpostman.com/json/collection/v2.1.0/collection.json>
    },
    "item": [{
        "name": "Translate from EN to various",
        "protocolProfileBehavior": {
            "disabledSystemHeaders": {
                "content-length": true
            }
        },
        "request": {
            "auth": {
                "type": "apikey",
                "apikey": [{
                        "key": "value",
                        "value": "",
                        "type": "string"
                    },
                    {
                        "key": "key",
                        "value": "Ocp-Apim-Subscription-Key",
                        "type": "string"
                    }
                ]
            },
            "method": "POST",
            "header": [{
                    "key": "Content-Type",
                    "value": "application/json",
                    "type": "text"
                },
                {
                    "key": "Ocp-Apim-Subscription-Region",
                    "value": "westeurope",
                    "type": "text"
                }
            ],
            "body": {
                "mode": "raw",
                "raw": "[{'Text':'Hello, what is your name?'}]",
                "options": {
                    "raw": {
                        "language": "json"
                    }
                }
            },
            "url": {
                "raw": < https: //api.cognitive.microsofttranslator.com/translate?api-version=3.0&from=en&to=de&to=ru&to=yue&to=tlh-Latn&to=ko&to=ku>,
                    "protocol": "https",
                "host": [
                    "api",
                    "cognitive",
                    "microsofttranslator",
                    "com"
                ],
                "path": [
                    "translate"
                ],
                "query": [{
                        "key": "api-version",
                        "value": "3.0"
                    },
                    {
                        "key": "from",
                        "value": "en"
                    },
                    {
                        "key": "to",
                        "value": "de"
                    },
                    {
                        "key": "to",
                        "value": "ru"
                    },
                    {
                        "key": "to",
                        "value": "yue"
                    },
                    {
                        "key": "to",
                        "value": "tlh-Latn"
                    },
                    {
                        "key": "to",
                        "value": "ko"
                    },
                    {
                        "key": "to",
                        "value": "ku"
                    },
                    {
                        "key": "to",
                        "value": "mn-Mong"
                    },
                    {
                        "key": "to",
                        "value": "ne"
                    },
                    {
                        "key": "to",
                        "value": "th"
                    },
                    {
                        "key": "to",
                        "value": "yua"
                    }
                ]
            }
        },
        "response": []
    }]
}

Speech Service

Azure offers a service to convert text to speech and speech to text. Here are some examples on how this works.

I followed this guide: https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/index-speech-to-text.

Here are the steps i took to make it work.

Convert input to wav

Make sure the input is a wav file. You can use FFMPEG for this task e.g.

ffmpeg -i input.mp4 -vn -acodec pcm_s16le -ar 44100 -ac 2 output.wav

For more options view my FFMPEG notes or visit https://ffmpeg.org/documentation.html

Install required tools

Install-Package Microsoft.CognitiveServices.Speech # If not possible, run: Register-PackageSource -Name MyNuGet -Location https://www.nuget.org/api/v2 -ProviderName NuGet
Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))
choco install -y vscode # vscode
choco install -y dotnet # .NET
Start-Process "vscode:extension/ms-dotnettools.csharp"
dotnet add package Microsoft.CognitiveServices.Speech # Speech Package

Create new Csharp Project

New-Item -Path SpeechTest -Type Directory 
cd SpeechTest
dotnet new console

Create new azure speech service resource

I create this manually as I was to lazy to script this: Fill out the form: https://portal.azure.com/#create/Microsoft.CognitiveServicesAllInOne And you are ready to go.

Code of Program.cs

The program is not optimized, kept simple and is based on the example provided by the microsoft guide. Change the parts where comments are added to make it work for you.

using System;
using System.IO;
using System.Threading.Tasks;
using Microsoft.CognitiveServices.Speech;
using Microsoft.CognitiveServices.Speech.Audio;

namespace SpeechTest
{
    class Program
    {
        async static Task Main(string[] args)
        {
            var speechConfig = SpeechConfig.FromSubscription("Cognitive Service API KEY1", "WestEurope"); //change
            speechConfig.SpeechRecognitionLanguage = "de-de"; // change
            using var audioConfig = AudioConfig.FromWavFileInput("Path\\File.wav"); // change

            using (var recognizer = new SpeechRecognizer(speechConfig, audioConfig))
            {
                recognizer.Recognizing += (s, e) => { Console.WriteLine($"RECOGNIZING: Text={e.Result.Text}"); };

                recognizer.Recognized += (s, e) => {
                    var result = e.Result;
                    Console.WriteLine($"Reason: {result.Reason.ToString()}");

                    switch (result.Reason)
                    {
                        case ResultReason.RecognizedSpeech:
                            Console.WriteLine($"Final result: Text: {result.Text}.");
                            File.AppendAllText("Output.txt", Environment.NewLine);
                            File.AppendAllText("Output.txt", result.Text);
                            break;
                        case ResultReason.NoMatch:
                            Console.WriteLine($"NOMATCH: Speech could not be recognized.");
                            break;
                        case ResultReason.Canceled:
                            var cancellation = CancellationDetails.FromResult(result);
                            Console.WriteLine($"CANCELED: Reason={cancellation.Reason}");
                            if (cancellation.Reason == CancellationReason.Error)
                            {
                                Console.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}");
                                Console.WriteLine($"CANCELED: ErrorDetails={cancellation.ErrorDetails}");
                                Console.WriteLine($"CANCELED: Did you update the subscription info?");
                            }
                            break;
                    }
                };

                recognizer.Canceled += (s, e) => { Console.WriteLine($"\n    Canceled. Reason: {e.Reason.ToString()}, CanceledReason: {e.Reason}"); };
                recognizer.SessionStarted += (s, e) => { Console.WriteLine("\n    Session started event."); };
                recognizer.SessionStopped += (s, e) => { Console.WriteLine("\n    Session stopped event."); };

                await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false);

                do { Console.WriteLine("Press Enter to stop");
                } while (Console.ReadKey().Key != ConsoleKey.Enter);

                await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false);
            }
        }
    }
}

Run the code with:

dotnet run

This example output shows how the program first checks word for word and once the sentence is over commits a sentence. Sometimes content of the sentence changes based on the following word(s). The example is analyzing a story by Nils Holgersson in german language:

speechtotext