FF-228 - ffmpeg builder merge subtitle track now parses language codes

This commit is contained in:
John Andrews
2022-07-21 13:33:02 +12:00
parent 8b6fc37564
commit 3bc1acb420
3 changed files with 271 additions and 18 deletions

View File

@@ -67,16 +67,24 @@ public class FfmpegBuilderSubtitleTrackMerge : FfmpegBuilderNode
if (Subtitles.Contains(ext) == false)
continue;
string language = string.Empty;
if (MatchFilename)
{
bool matchesOriginal = FilenameMatches(args.FileName, file.FullName);
bool matchesWorking = FilenameMatches(args.WorkingFile, file.FullName);
string lang1, lang2;
bool matchesOriginal = FilenameMatches(args.FileName, file.FullName, out lang1);
bool matchesWorking = FilenameMatches(args.WorkingFile, file.FullName, out lang2);
if (matchesOriginal == false && matchesWorking == false)
continue;
if (string.IsNullOrEmpty(lang1) == false)
language = lang1;
if (string.IsNullOrEmpty(lang2) == false)
language = lang2;
}
args.Logger.ILog("Adding file: " + file.FullName + " [" + ext + "]");
args.Logger.ILog("Adding file: " + file.FullName + " [" + ext + "]" + (string.IsNullOrEmpty(language) == false ? " (Language: " + language + ")" : ""));
this.Model.InputFiles.Add(file.FullName);
this.Model.SubtitleStreams.Add(new FfmpegSubtitleStream
{
@@ -84,6 +92,7 @@ public class FfmpegBuilderSubtitleTrackMerge : FfmpegBuilderNode
{
InputFileIndex = this.Model.InputFiles.Count - 1,
TypeIndex = 0,
Language = language,
Title = file.Name.Replace(file.Extension, ""),
Codec = file.Extension[1..],
IndexString = (this.Model.InputFiles.Count - 1) + ":s:0"
@@ -98,8 +107,9 @@ public class FfmpegBuilderSubtitleTrackMerge : FfmpegBuilderNode
return count > 0 ? 1 : 2;
}
internal bool FilenameMatches(string input, string other)
internal bool FilenameMatches(string input, string other, out string languageCode)
{
languageCode = String.Empty;
var inputFile = new FileInfo(input);
string inputName = inputFile.Name.Replace(inputFile.Extension, "");
@@ -113,6 +123,24 @@ public class FfmpegBuilderSubtitleTrackMerge : FfmpegBuilderNode
{
string stripLang = Regex.Replace(otherName, @"(\.[a-zA-Z]{2,3}){1,2}$", string.Empty).Replace(" ", " ").Trim();
var rgxLanguage = new Regex("(?<=(\\.))(" + string.Join("|", LanguageCodes.Codes.Keys) + ")");
if (rgxLanguage.IsMatch(otherName))
{
string key = rgxLanguage.Match(otherName).Value;
languageCode = LanguageCodes.Codes[key];
}
if (string.IsNullOrEmpty(languageCode) == false)
{
if (Regex.IsMatch(otherName, @"\.hi(\.|$)"))
languageCode += " (HI)";
if (Regex.IsMatch(otherName, @"\.cc(\.|$)"))
languageCode += " (CC)";
if (Regex.IsMatch(otherName, @"\.sdh(\.|$)"))
languageCode += " (SDH)";
}
if (inputName.ToLowerInvariant().Equals(stripLang.ToLowerInvariant()))
return true;
}
@@ -121,6 +149,23 @@ public class FfmpegBuilderSubtitleTrackMerge : FfmpegBuilderNode
{
string stripLang = Regex.Replace(otherName, @"\([a-zA-Z]{2,3}\)", string.Empty).Replace(" ", " ").Trim();
var rgxLanguage = new Regex("(?<=(\\())(" + string.Join("|", LanguageCodes.Codes.Keys) + ")(?!=\\))");
if (rgxLanguage.IsMatch(otherName))
{
string key = rgxLanguage.Match(otherName).Value;
languageCode = LanguageCodes.Codes[key];
}
if (string.IsNullOrEmpty(languageCode) == false)
{
if (other.ToLower().Contains("(hi)"))
languageCode += " (HI)";
else if (other.ToLower().Contains("(cc)"))
languageCode += " (CC)";
else if (other.ToLower().Contains("(sdh)"))
languageCode += " (SDH)";
}
if (inputName.ToLowerInvariant().Equals(stripLang.ToLowerInvariant()))
return true;
}

199
VideoNodes/LanguageCodes.cs Normal file
View File

@@ -0,0 +1,199 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace FileFlows.VideoNodes;
internal class LanguageCodes
{
internal static Dictionary<string, string> Codes = new Dictionary<string, string>
{
{ "aa", "Afar" },
{ "ab", "Abkhazian" },
{ "af", "Afrikaans" },
{ "ak", "Akan" },
{ "sq", "Albanian" },
{ "am", "Amharic" },
{ "ar", "Arabic" },
{ "an", "Aragonese" },
{ "hy", "Armenian" },
{ "as", "Assamese" },
{ "av", "Avaric" },
{ "ae", "Avestan" },
{ "ay", "Aymara" },
{ "az", "Azerbaijani" },
{ "ba", "Bashkir" },
{ "bm", "Bambara" },
{ "eu", "Basque" },
{ "be", "Belarusian" },
{ "bn", "Bengali" },
{ "bh", "Bihari languages" },
{ "bi", "Bislama" },
{ "bs", "Bosnian" },
{ "br", "Breton" },
{ "bg", "Bulgarian" },
{ "my", "Burmese" },
{ "ca", "Catalan; Valencian" },
{ "ch", "Chamorro" },
{ "ce", "Chechen" },
{ "zh", "Chinese" },
{ "cu", "Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic" },
{ "cv", "Chuvash" },
{ "kw", "Cornish" },
{ "co", "Corsican" },
{ "cr", "Cree" },
{ "cs", "Czech" },
{ "da", "Danish" },
{ "dv", "Divehi; Dhivehi; Maldivian" },
{ "nl", "Dutch; Flemish" },
{ "dz", "Dzongkha" },
{ "en", "English" },
{ "eo", "Esperanto" },
{ "et", "Estonian" },
{ "ee", "Ewe" },
{ "fo", "Faroese" },
{ "fj", "Fijian" },
{ "fi", "Finnish" },
{ "fr", "French" },
{ "fy", "Western Frisian" },
{ "ff", "Fulah" },
{ "ka", "Georgian" },
{ "de", "German" },
{ "gd", "Gaelic; Scottish Gaelic" },
{ "ga", "Irish" },
{ "gl", "Galician" },
{ "gv", "Manx" },
{ "el", "Greek, Modern (1453-)" },
{ "gn", "Guarani" },
{ "gu", "Gujarati" },
{ "ht", "Haitian; Haitian Creole" },
{ "ha", "Hausa" },
{ "he", "Hebrew" },
{ "hz", "Herero" },
{ "hi", "Hindi" },
{ "ho", "Hiri Motu" },
{ "hr", "Croatian" },
{ "hu", "Hungarian" },
{ "ig", "Igbo" },
{ "is", "Icelandic" },
{ "io", "Ido" },
{ "ii", "Sichuan Yi; Nuosu" },
{ "iu", "Inuktitut" },
{ "ie", "Interlingue; Occidental" },
{ "ia", "Interlingua (International Auxiliary Language Association)" },
{ "id", "Indonesian" },
{ "ik", "Inupiaq" },
{ "it", "Italian" },
{ "jv", "Javanese" },
{ "ja", "Japanese" },
{ "kl", "Kalaallisut; Greenlandic" },
{ "kn", "Kannada" },
{ "ks", "Kashmiri" },
{ "kr", "Kanuri" },
{ "kk", "Kazakh" },
{ "km", "Central Khmer" },
{ "ki", "Kikuyu; Gikuyu" },
{ "rw", "Kinyarwanda" },
{ "ky", "Kirghiz; Kyrgyz" },
{ "kv", "Komi" },
{ "kg", "Kongo" },
{ "ko", "Korean" },
{ "kj", "Kuanyama; Kwanyama" },
{ "ku", "Kurdish" },
{ "lo", "Lao" },
{ "la", "Latin" },
{ "lv", "Latvian" },
{ "li", "Limburgan; Limburger; Limburgish" },
{ "ln", "Lingala" },
{ "lt", "Lithuanian" },
{ "lb", "Luxembourgish; Letzeburgesch" },
{ "lu", "Luba-Katanga" },
{ "lg", "Ganda" },
{ "mk", "Macedonian" },
{ "mh", "Marshallese" },
{ "ml", "Malayalam" },
{ "mi", "Maori" },
{ "mr", "Marathi" },
{ "ms", "Malay" },
{ "mg", "Malagasy" },
{ "mt", "Maltese" },
{ "mn", "Mongolian" },
{ "na", "Nauru" },
{ "nv", "Navajo; Navaho" },
{ "nr", "Ndebele, South; South Ndebele" },
{ "nd", "Ndebele, North; North Ndebele" },
{ "ng", "Ndonga" },
{ "ne", "Nepali" },
{ "nn", "Norwegian Nynorsk; Nynorsk, Norwegian" },
{ "nb", "Bokmål, Norwegian; Norwegian Bokmål" },
{ "no", "Norwegian" },
{ "ny", "Chichewa; Chewa; Nyanja" },
{ "oc", "Occitan (post 1500); Provençal" },
{ "oj", "Ojibwa" },
{ "or", "Oriya" },
{ "om", "Oromo" },
{ "os", "Ossetian; Ossetic" },
{ "pa", "Panjabi; Punjabi" },
{ "fa", "Persian" },
{ "pi", "Pali" },
{ "pl", "Polish" },
{ "pt", "Portuguese" },
{ "ps", "Pushto; Pashto" },
{ "qu", "Quechua" },
{ "rm", "Romansh" },
{ "ro", "Romanian; Moldavian; Moldovan" },
{ "rn", "Rundi" },
{ "ru", "Russian" },
{ "sg", "Sango" },
{ "sa", "Sanskrit" },
{ "si", "Sinhala; Sinhalese" },
{ "sk", "Slovak" },
{ "sl", "Slovenian" },
{ "se", "Northern Sami" },
{ "sm", "Samoan" },
{ "sn", "Shona" },
{ "sd", "Sindhi" },
{ "so", "Somali" },
{ "st", "Sotho, Southern" },
{ "es", "Spanish; Castilian" },
{ "sc", "Sardinian" },
{ "sr", "Serbian" },
{ "ss", "Swati" },
{ "su", "Sundanese" },
{ "sw", "Swahili" },
{ "sv", "Swedish" },
{ "ty", "Tahitian" },
{ "ta", "Tamil" },
{ "tt", "Tatar" },
{ "te", "Telugu" },
{ "tg", "Tajik" },
{ "tl", "Tagalog" },
{ "th", "Thai" },
{ "bo", "Tibetan" },
{ "ti", "Tigrinya" },
{ "to", "Tonga (Tonga Islands)" },
{ "tn", "Tswana" },
{ "ts", "Tsonga" },
{ "tk", "Turkmen" },
{ "tr", "Turkish" },
{ "tw", "Twi" },
{ "ug", "Uighur; Uyghur" },
{ "uk", "Ukrainian" },
{ "ur", "Urdu" },
{ "uz", "Uzbek" },
{ "ve", "Venda" },
{ "vi", "Vietnamese" },
{ "vo", "Volapük" },
{ "cy", "Welsh" },
{ "wa", "Walloon" },
{ "wo", "Wolof" },
{ "xh", "Xhosa" },
{ "yi", "Yiddish" },
{ "yo", "Yoruba" },
{ "za", "Zhuang; Chuang" },
{ "zu", "Zulu" }
};
}

View File

@@ -1224,22 +1224,31 @@ public class FfmpegBuilder_BasicTests
public void FfmpegBuilder_SubtitleTrackMerge_FileMatchesTests()
{
FfmpegBuilderSubtitleTrackMerge ffSubMerge = new();
Assert.IsTrue(ffSubMerge.FilenameMatches("Test.mkv", "test.srt"));
Assert.IsTrue(ffSubMerge.FilenameMatches("Test.mkv", "test.en.srt"));
Assert.IsTrue(ffSubMerge.FilenameMatches("Test.mkv", "test(en).srt"));
Assert.IsTrue(ffSubMerge.FilenameMatches("Test.mkv", "test (en).srt"));
Assert.IsTrue(ffSubMerge.FilenameMatches("Test.mkv", "test.en.hi.srt"));
Assert.IsTrue(ffSubMerge.FilenameMatches("Test.mkv", "test.en.sdh.srt"));
Assert.IsTrue(ffSubMerge.FilenameMatches("Test.mkv", "test.en.cc.srt"));
Assert.IsTrue(ffSubMerge.FilenameMatches("Test.mkv", "test.deu.srt"));
Assert.IsTrue(ffSubMerge.FilenameMatches("Test.mkv", "test(deu).srt"));
Assert.IsTrue(ffSubMerge.FilenameMatches("Test.mkv", "test (deu).srt"));
foreach (var item in new[] {
("test.srt", "", true),
("test.en.srt", "English", true),
("test(en).srt", "English", true),
("test (en).srt", "English", true),
("test.en.hi.srt", "English (HI)", true),
("test.en.sdh.srt", "English (SDH)", true),
("test.en.cc.srt", "English (CC)", true),
("test.de.srt", "German", true),
("test(de).srt", "German", true),
("test (de).srt", "German", true),
("nomatch.srt", "", false),
("nomatch.en.srt", "English", false),
("nomatch(en).srt", "English", false),
("nomatch (en).srt", "English", false)
})
{
string lang;
bool isMatch = ffSubMerge.FilenameMatches("Test.mkv", item.Item1, out lang);
Assert.AreEqual(item.Item3, isMatch);
Assert.AreEqual(item.Item2, lang, "Language not matching in: " + item.Item1);
}
Assert.IsFalse(ffSubMerge.FilenameMatches("Test.mkv", "nomatch.srt"));
Assert.IsFalse(ffSubMerge.FilenameMatches("Test.mkv", "nomatch.en.srt"));
Assert.IsFalse(ffSubMerge.FilenameMatches("Test.mkv", "nomatch(en).srt"));
Assert.IsFalse(ffSubMerge.FilenameMatches("Test.mkv", "nomatch (en).srt"));
}
[TestMethod]