Commit 53ca29d5 by ana

### Adding a few scripts and images

parent c221890a
This image diff could not be displayed because it is too large. You can view the blob instead.
This image diff could not be displayed because it is too large. You can view the blob instead.
This image diff could not be displayed because it is too large. You can view the blob instead.
 #!/usr/bin/env/ python # encoding=utf8 ''' This script calculates the ratio between 2 numbers by dividing one by the other ''' num1 = input('Enter the first number: ') num1 = int(num1) num2 = input('Enter the second number: ') num2 = int(num2) ratio12 = num1/num2 print('The ratio of', str(num1), 'and', str(num2),'is', str(ratio12) + '.')
 #!/usr/bin/env/ python # encoding=utf8 ''' This script takes a list of names and different languages for which it scrapes the content of the pages and saves it in separate textfiles. ''' import wikipediaapi species = "Passiflora ligularis" languages = ['hr', 'su'] # languages = ['en', 'es', 'fr', 'nl'] # languages = ['bn', 'kk', 'csb', 'fa', 'gd', 'hi'] # languages = ['hr', 'hsb', 'ht', 'ja', 'kn', 'lt' ] # languages = ['ml', 'nn', 'oc', 'sa', 'sd', 'sh', 'ur'] def gettext(species, languages): for language in languages: print(species) wiki_wiki = wikipediaapi.Wikipedia( language=language, extract_format=wikipediaapi.ExtractFormat.WIKI) text = "" p_wiki = wiki_wiki.page(species) text = p_wiki.text length = len(text) title = species + "_"+ language + ".txt" print('title:', title) print('length:', length) if text: with open(title, 'w') as destination: destination.write(str(length)) destination.write("\n\n") destination.write(text) result = gettext(species, languages) \ No newline at end of file
 #!/usr/bin/env/ python # encoding=utf8 ''' This script takes a list of names and different languages for which it scrapes the content of the pages and saves it in separate textfiles. ''' import wikipediaapi import csv species = ["Cinchona officinalis", "Juglans_nigra", "Elaeis guineensis", "Myristica fragrans", "Passiflora ligularis", "Eucalyptus regnans"] # Because Wikipedia allows only a limited set of queries, it helps to launch the script several times for different languages # languages = ['en', 'es', 'fr', 'nl'] # languages = ['ar', 'ast', 'azb', 'az'] # languages = ['ca', 'ceb', 'cs', 'csb'] # languages = ['da', 'de', 'el', 'eu'] # languages = ['eo', 'fa', 'fi', 'frr'] # languages = ['gd', 'gl', 'hi', 'hsb'] # languages = ['ht', 'hu', 'it', 'ja'] # languages = ['kk', 'ko', 'la', 'ln'] # languages = ['lt', 'ms', 'nn', 'no'] # languages = ['oc', 'pl', 'pt', 'qu'] # languages = ['ru', 'sa', 'sd', 'sh'] # languages = ['simple', 'sk', 'su', 'sv'] # languages = ['th', 'tr', 'uk', 'vi'] languages = ['war', 'zh-yue','zh'] def gettext(species, languages): for specie in species: for language in languages: if language: wiki_wiki = wikipediaapi.Wikipedia( language=language, extract_format=wikipediaapi.ExtractFormat.WIKI) text = "" p_wiki = wiki_wiki.page(specie) text = p_wiki.text length = len(text) print('specie:', specie) print('language:', language) print('length:', length) with open('length_file.csv', mode='a') as length_file: length_writer = csv.writer(length_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) length_writer.writerow([specie, language, length]) result = gettext(species, languages) \ No newline at end of file
 #!/usr/bin/env/ python # encoding=utf8 ''' For each of the pagenames, this script gets all the links mentioned in that page, and this in 4 different languages. This was necessary to get some idea of the different species represented on Wikipedia. !!!!!!!!!!!!!!!!!! ADD LANGUAGES in SCRIPT! MAKE FUNCTION! ''' from bs4 import BeautifulSoup as bs import requests pages = ["Trees_of_Africa", "Trees_of_Africa&pagefrom=Senegalia+ataxacantha#mw-pages",\ "Trees_of_Algeria", "Trees_of_Angola", "Trees_of_Botswana", "Trees_of_the_Democratic_Republic_of_the_Congo",\ "Trees_of_Ethiopia", "Trees_of_Ghana", "Trees_of_Madagascar", "Trees_of_Morocco",\ "Trees_of_R%C3%A9union", "Trees_of_Seychelles", "Trees_of_South_Africa"] for page in pages: wikipage = "https://en.wikipedia.org/wiki/"+page print("\npage:", wikipage) res = requests.get(wikipage) if res: soup = bs(res.text, "html.parser") print('soup:', soup) species = [] for link in soup.find_all("a"): url = link.get("href", "") print('url:', url) if "/wiki/" in url: name_species = url.replace("/wiki/", "") species.append(name_species) destination = page+".txt" with open(destination, 'w') as source: for specie in species: source.write(specie) source.write('\n') else: pass # complete_links =["https://en.wikipedia.org/wiki/", "https://es.wikipedia.org/wiki/", "https://fr.wikipedia.org/wiki/", "https://nl.wikipedia.org/wiki/"] ''' comments: Trees of Africa refer to all countries listed here: https://en.wikipedia.org/wiki/Ecoregions_of_Africa ''' \ No newline at end of file
 #!/usr/bin/env/ python # encoding=utf8 ''' This script takes a list of names and different languages for which it scrapes the content of the pages and saves it in separate textfiles. ''' import wikipediaapi species = ["Cinchona officinalis", "Juglans_nigra", "Elaeis guineensis", "Myristica fragrans", "Passiflora ligularis", "Eucalyptus regnans"] languages = ['en', 'es', 'fr', 'nl'] languages_all = ['en', 'es', 'fr', 'nl', 'ar', 'ast', 'azb', 'az', 'ca', 'ceb', 'cs', 'csb', 'da', 'de', 'el', 'eu', 'eo', 'fa', \ 'fi', 'frr', 'gd', 'gl', 'hi', 'hsb', 'ht', 'hu', 'it', 'ja', 'kk', 'ko', 'la', 'ln', 'lt', 'ms', 'nn', 'no', \ 'oc', 'pl', 'pt', 'qu', 'ru', 'sa', 'sd', 'sh', 'simple', 'sk', 'su'\ 'sv', 'th', 'tr', 'uk', 'vi', 'war', 'zh-yue','zh'] def gettext(species, languages): for specie in species: for language in languages: print(specie) wiki_wiki = wikipediaapi.Wikipedia( language=language, extract_format=wikipediaapi.ExtractFormat.WIKI) text = "" p_wiki = wiki_wiki.page(specie) text = p_wiki.text length = len(text) title = specie + "_"+ language + ".txt" print('title:', title) print('length:', length) if text: with open(title, 'w') as destination: destination.write(str(length)) destination.write("\n\n") destination.write(text) result = gettext(species, languages) \ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!