User:Neoconned/SourceWatchRefConverter

This has now been mothballed in favour of the SourceWatch:RefConverterBot project.


 * This is a temporary home for the code. When it's a bit more finished, it will be integrated into MediaWiki:Monobook.js.
 * This code is adapted and developed from Cyde Weys's ref converter: http://en.wikipedia.org/wiki/User:Cyde/Ref_converter. There are two purposes to the rewrite:
 * Handle "traditional" SourceWatch style referencing. This consists of a plain numbered link in the body of the article, e.g. and a corresponding citation in the External Links section. The vast majority of SW articles still use this referencing style. Very few SW articles use the note/ref templates (which are what Cyde Weys's converter will convert).
 * Run in JavaScript rather than Perl. You'll therefore be able to run the converter from the Edit page when you edit an article.
 * The rewrite will take a while. Don't expect results soon.

The code
// This program converts (on MediaWiki wikis): // * and to style. // *Traditional SourceWatch style references to style. // // Copyright (C) 2006 Ben "Cyde Weys" McIlwain // Copyright (C) 2007 Neoconned (http://www.sourcewatch.org/index.php?title=User:Neoconned) // // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License // as published by the Free Software Foundation; either version 2 // of the License, or (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. // // //

function swrcMain(fullText) {   //This accumulates the number of possible things that were incorrect with /. var numErrors = 0; // This keeps track of the initial length of the article // before we make any changes to it. var preLength = fullText.length;

//These two variables accumulate lines of text and are //output at the end. var warnings = new String; var verbosage = new String;

// Neoconned: Not really sure what this is about... //Get rid of the "How to add a footnote" comment that this script makes superfluous. //if ($fullText =~ m/\<\!\-\-[^\n]*add[^\n]*footnote.*?\-\-\>/s) { //   $fullText =~ s/\<\!\-\-[^\n]*add[^\n]*footnote.*?\-\-\>//s; //   $warnings .= "Deleting comment on how to add old footnotes, make sure this was done correctly.\n"; //}

// is incredibly broken if (fullText.indexOf('mnb2')!=-1) {	   alert("Panic, detecting, this article is most likely broken and will need manual repair."); return; }

//This goes through the article source looking for citation //templates that are over one line. This is	//necessary because the citation templates must be inserted into //the article text inline or things will break. //This has the side-effect of changing citation templates that //aren't part of notes. Oh well. //Then we need to detect if any changes have been made, and if       //they have, print a warning message to that effect. my $tempText = $fullText; $fullText =~ s/(\{\{cite [^\{\}]*?\}\})/my$x=$1;$x=~s{\n}{}g; $x/egs; if ($tempText ne $fullText) { $warnings .= "Detecting multiple line cite, trying to fix, make sure I don't make any mistakes.\n"; }

//Get a list of all matches of and //and and

my @matches = ($fullText =~ m/\{\{(?:mn|ref(?:[_ ]label|[_ ]harv|[_ ]harvard|[_ ]num)?)\s*\|\s*([^\|]*?)\s*(?:\|\s*[^\|\}]*?\s*)*?\}\}/gi); //push @matches, ($fullText =~ m/\{\{mn\s*\|\s*([^\|]*?)\s*\|\s*[^\|\}]*?\s*\}\}/gi);

// If there are no s in the article then there's       // no point in continuing. if ($//matches > -1) {           swrcArticleContainsSomeRefs; } }

function swrcArticleContainsSomeRefs {	   ////// This next little section creates @matchesSingle, which // consists of @matches minus // any duplicate entries, and @matchesMult, which // consists of a list of single entries // of things that did have duplicate entries. It also // removes duplicate entries from @matches. // In other words, if @matches was [a,a,b,c,d,d,e], then: // @matches = [a,b,c,d,e] // @matchesSingle = [b,c,e] // @matchesMult = [a,d]

my %tempHash; my %multHash; foreach (@matches) { //Note: lc turns all the characters of a string into //their lowercase counterparts._ if (exists $tempHash{lc($_)}) { $multHash{lc($_)} = lc($_); }		else { $tempHash{lc($_)} = lc($_); }	   }	    @matches = sort values %tempHash; my @matchesMult = sort values %multHash;

//Subtract set @matchesMult from set @matchesSingle foreach (@matchesMult) { delete $tempHash{$_}; }	   my @matchesSingle = sort values %tempHash;

//	   // End complicated section. //////

if ($//matchesMult >= 0) { $warnings .= "Detecting multiple refs with the same name, make sure I handle this correctly.\n"; }

//refCoors is the hash between ref name and note text. my %refCorrs = ; my $finalText = ""; my $firstMatch = 1; my $matched = 0;

swrcFindTheNotes(fullText);

my $currMatch = "";

//Go through and replace references that were only referenced once with a simple into the article. $numErrors++; $warnings .= "Found a blank note, ref is \"$currMatch\"\n"; }		else { $numErrors++; $warnings .= "Ref \"$currMatch\" doesn\'t exist in notes. Turning into \{\{citation needed\}\}\n"; }	   }

//Now we need to go through and replace references that were referenced multiple times. //We need to name our references now. foreach $currMatch (@matchesMult) { if (exists $refCorrs{$currMatch} && $refCorrs{$currMatch} !~ m/^\s*$/) { //Cite.php returns an error if the refName is an integer value, so we'll pad it out with a character. my $refName = $currMatch; if ($refName =~ m/^\d+$/) { $refName = 'ref'. $refName; }		   $finalText =~ s/\{\{(?:mn|ref(?:[_ ]label|[_ ]harv|[_ ]harvard|[_ ]num)?)\s*\|\s*\Q$currMatch\E\s*(?:\|[^\|\}]*?\s*)*?\}\}/\$refCorrs{$currMatch}\<\/ref\>/i; $finalText =~ s/\{\{(?:mn|ref(?:[_ ]label|[_ ]harv|[_ ]harvard|[_ ]num)?)\s*\|\s*\Q$currMatch\E\s*(?:\|[^\|\}]*?\s*)*?\}\}/\/gi; $verbosage .= "Replacing multiply referenced \"$refName\" with full notes: \$refCorrs{$currMatch}\<\/ref\>\n"; }		elsif (exists $refCorrs{$currMatch} && $currMatch =~ m/^\s*$/) { //Deal with blank notes. We don't want to be inserting into the article. $numErrors++; $warnings .= "Found a blank multiply referenced note, ref is \"$currMatch\"\n"; }		else { $numErrors++; $warnings .= "Multiple reference \"$currMatch\" doesn\'t exist in notes. Turning into \{\{citation needed\}\}\n"; }	   }	    //One more loop through any remaining  tags to turn them into. $finalText =~ s/\{\{(?:mn|ref(?:[_ ]label|[_ ]harv|[_ ]harvard|[_ ]num)?)\s*\|\s*[^\|]*?\s*(?:\|[^\|\}]*?\s*)*?\}\}/\{\{citation needed\}\}/gi;

//Remove excess spaces that we may have just made by deleting the content inbetween. if ($finalText =~ m/\n{4,}/gs) { $warnings .= "I think I have found too many consecutive newlines, I am going to remove them, make sure I did this right.\n"; $finalText =~ s/\n{4,}/\n\n/gs; }

//Final sanity checks if ($finalText =~ m/\{\{ref/gi) { $warnings .= "Failing sanity check, there may still be some s left.\n"; }	   if ($finalText =~ m/\{\{note/gi) { $warnings .= "Failing sanity check, there may still be some s left.\n"; }	   if ($finalText =~ m/\{\{mn/gi) { $warnings .= "Failing sanity check, there may still be some Footnote4 stuff left ( or ).\n"; }

print 'Finished. ' . "\n";

}	else { } }

/*

swrcFindTheNotes function


//Split the full Wiki source into discrete lines and //process them sequentially to see if //each line contains a or a. //If the line does contain a , //match it up in the hash with its appropriate //ref. If it doesn't match, throw a warning //and comment it out. If it did match, remove it, and //replace all removed s with a single function swrcFindTheNotes(fullText) {	var fullTextLines=fullText.split("\n");

for (i=0; i<fullTextLines.length; i++) {		var thisLine = fullTextLines[i]; var matched = 0;

//Loop through each of the ref names to see if it matches //with any notes on this line. This has O(n*m) efficiency. foreach (@matches) {		   if ($thisLine =~ m/\{\{(?:mnb2?|note(?:[_ ]label)?)\s*\|\s*\Q$_\E\s*(?:\|\s*[^\{\}]*?\s*)*\}\}\s*(.*)$/i) {				my $thisMatch = $1; if ($thisMatch =~ m/(\{\{note[_ ]label[^\}\{]*?\}\})/i) {					$thisMatch =~ s/\{\{note[_ ]label\s*[^\}\{]*?\}\}//gi; }

//Chop off leading and trailing spaces. $thisMatch =~ s/^\s+//; $thisMatch =~ s/\s+$//; $verbosage .= "Matching up ref \"$_\", removing from list, note is: $thisMatch\n"; $refCorrs{$_} = $thisMatch; $matched = 1; //firstMatch is used to keep track of the first note //that has been replaced. The first note is replaced //with and the rest are just deleted. if ($firstMatch == 1) {					if ($fullText !~ m/\/g) {						if ($smallFont eq "on") {							$finalText .= ' '. "\n"; }						else {							$finalText .= " \n"; }					}					$firstMatch = 0; }		   }		}

//If this line had a note with no corresponding ref, comment //it out and print a warning message. if ($matched == 0) {		   if ($thisLine =~ m/\{\{(?:mnb2?|note)\s*\|\s*([^\|]*?)\s*\|?\s*\}\}\s*(.*)$/i) {				$warnings .= "Note \"$1\" isn\'t referenced, commenting out, link was: $2\n"; $numErrors++; $finalText .= "\n"; }		   else {				$finalText = $finalText. $thisLine. "\n"; }		}   } }